1 /*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "code_generator_x86.h"
18
19 #include "art_method.h"
20 #include "code_generator_utils.h"
21 #include "compiled_method.h"
22 #include "entrypoints/quick/quick_entrypoints.h"
23 #include "entrypoints/quick/quick_entrypoints_enum.h"
24 #include "gc/accounting/card_table.h"
25 #include "intrinsics.h"
26 #include "intrinsics_x86.h"
27 #include "mirror/array-inl.h"
28 #include "mirror/class-inl.h"
29 #include "thread.h"
30 #include "utils/assembler.h"
31 #include "utils/stack_checks.h"
32 #include "utils/x86/assembler_x86.h"
33 #include "utils/x86/managed_register_x86.h"
34
35 namespace art {
36
37 template<class MirrorType>
38 class GcRoot;
39
40 namespace x86 {
41
42 static constexpr int kCurrentMethodStackOffset = 0;
43 static constexpr Register kMethodRegisterArgument = EAX;
44 static constexpr Register kCoreCalleeSaves[] = { EBP, ESI, EDI };
45
46 static constexpr int kC2ConditionMask = 0x400;
47
48 static constexpr int kFakeReturnRegister = Register(8);
49
50 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
51 #define __ down_cast<X86Assembler*>(codegen->GetAssembler())-> // NOLINT
52 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kX86PointerSize, x).Int32Value()
53
54 class NullCheckSlowPathX86 : public SlowPathCode {
55 public:
NullCheckSlowPathX86(HNullCheck * instruction)56 explicit NullCheckSlowPathX86(HNullCheck* instruction) : SlowPathCode(instruction) {}
57
EmitNativeCode(CodeGenerator * codegen)58 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
59 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
60 __ Bind(GetEntryLabel());
61 if (instruction_->CanThrowIntoCatchBlock()) {
62 // Live registers will be restored in the catch block if caught.
63 SaveLiveRegisters(codegen, instruction_->GetLocations());
64 }
65 x86_codegen->InvokeRuntime(kQuickThrowNullPointer,
66 instruction_,
67 instruction_->GetDexPc(),
68 this);
69 CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
70 }
71
IsFatal() const72 bool IsFatal() const OVERRIDE { return true; }
73
GetDescription() const74 const char* GetDescription() const OVERRIDE { return "NullCheckSlowPathX86"; }
75
76 private:
77 DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathX86);
78 };
79
80 class DivZeroCheckSlowPathX86 : public SlowPathCode {
81 public:
DivZeroCheckSlowPathX86(HDivZeroCheck * instruction)82 explicit DivZeroCheckSlowPathX86(HDivZeroCheck* instruction) : SlowPathCode(instruction) {}
83
EmitNativeCode(CodeGenerator * codegen)84 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
85 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
86 __ Bind(GetEntryLabel());
87 x86_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this);
88 CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
89 }
90
IsFatal() const91 bool IsFatal() const OVERRIDE { return true; }
92
GetDescription() const93 const char* GetDescription() const OVERRIDE { return "DivZeroCheckSlowPathX86"; }
94
95 private:
96 DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathX86);
97 };
98
99 class DivRemMinusOneSlowPathX86 : public SlowPathCode {
100 public:
DivRemMinusOneSlowPathX86(HInstruction * instruction,Register reg,bool is_div)101 DivRemMinusOneSlowPathX86(HInstruction* instruction, Register reg, bool is_div)
102 : SlowPathCode(instruction), reg_(reg), is_div_(is_div) {}
103
EmitNativeCode(CodeGenerator * codegen)104 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
105 __ Bind(GetEntryLabel());
106 if (is_div_) {
107 __ negl(reg_);
108 } else {
109 __ movl(reg_, Immediate(0));
110 }
111 __ jmp(GetExitLabel());
112 }
113
GetDescription() const114 const char* GetDescription() const OVERRIDE { return "DivRemMinusOneSlowPathX86"; }
115
116 private:
117 Register reg_;
118 bool is_div_;
119 DISALLOW_COPY_AND_ASSIGN(DivRemMinusOneSlowPathX86);
120 };
121
122 class BoundsCheckSlowPathX86 : public SlowPathCode {
123 public:
BoundsCheckSlowPathX86(HBoundsCheck * instruction)124 explicit BoundsCheckSlowPathX86(HBoundsCheck* instruction) : SlowPathCode(instruction) {}
125
EmitNativeCode(CodeGenerator * codegen)126 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
127 LocationSummary* locations = instruction_->GetLocations();
128 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
129 __ Bind(GetEntryLabel());
130 // We're moving two locations to locations that could overlap, so we need a parallel
131 // move resolver.
132 if (instruction_->CanThrowIntoCatchBlock()) {
133 // Live registers will be restored in the catch block if caught.
134 SaveLiveRegisters(codegen, instruction_->GetLocations());
135 }
136
137 // Are we using an array length from memory?
138 HInstruction* array_length = instruction_->InputAt(1);
139 Location length_loc = locations->InAt(1);
140 InvokeRuntimeCallingConvention calling_convention;
141 if (array_length->IsArrayLength() && array_length->IsEmittedAtUseSite()) {
142 // Load the array length into our temporary.
143 uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(array_length->AsArrayLength());
144 Location array_loc = array_length->GetLocations()->InAt(0);
145 Address array_len(array_loc.AsRegister<Register>(), len_offset);
146 length_loc = Location::RegisterLocation(calling_convention.GetRegisterAt(1));
147 // Check for conflicts with index.
148 if (length_loc.Equals(locations->InAt(0))) {
149 // We know we aren't using parameter 2.
150 length_loc = Location::RegisterLocation(calling_convention.GetRegisterAt(2));
151 }
152 __ movl(length_loc.AsRegister<Register>(), array_len);
153 if (mirror::kUseStringCompression) {
154 __ shrl(length_loc.AsRegister<Register>(), Immediate(1));
155 }
156 }
157 x86_codegen->EmitParallelMoves(
158 locations->InAt(0),
159 Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
160 Primitive::kPrimInt,
161 length_loc,
162 Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
163 Primitive::kPrimInt);
164 QuickEntrypointEnum entrypoint = instruction_->AsBoundsCheck()->IsStringCharAt()
165 ? kQuickThrowStringBounds
166 : kQuickThrowArrayBounds;
167 x86_codegen->InvokeRuntime(entrypoint, instruction_, instruction_->GetDexPc(), this);
168 CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>();
169 CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
170 }
171
IsFatal() const172 bool IsFatal() const OVERRIDE { return true; }
173
GetDescription() const174 const char* GetDescription() const OVERRIDE { return "BoundsCheckSlowPathX86"; }
175
176 private:
177 DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathX86);
178 };
179
180 class SuspendCheckSlowPathX86 : public SlowPathCode {
181 public:
SuspendCheckSlowPathX86(HSuspendCheck * instruction,HBasicBlock * successor)182 SuspendCheckSlowPathX86(HSuspendCheck* instruction, HBasicBlock* successor)
183 : SlowPathCode(instruction), successor_(successor) {}
184
EmitNativeCode(CodeGenerator * codegen)185 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
186 LocationSummary* locations = instruction_->GetLocations();
187 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
188 __ Bind(GetEntryLabel());
189 SaveLiveRegisters(codegen, locations); // Only saves full width XMM for SIMD.
190 x86_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this);
191 CheckEntrypointTypes<kQuickTestSuspend, void, void>();
192 RestoreLiveRegisters(codegen, locations); // Only restores full width XMM for SIMD.
193 if (successor_ == nullptr) {
194 __ jmp(GetReturnLabel());
195 } else {
196 __ jmp(x86_codegen->GetLabelOf(successor_));
197 }
198 }
199
GetReturnLabel()200 Label* GetReturnLabel() {
201 DCHECK(successor_ == nullptr);
202 return &return_label_;
203 }
204
GetSuccessor() const205 HBasicBlock* GetSuccessor() const {
206 return successor_;
207 }
208
GetDescription() const209 const char* GetDescription() const OVERRIDE { return "SuspendCheckSlowPathX86"; }
210
211 private:
212 HBasicBlock* const successor_;
213 Label return_label_;
214
215 DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathX86);
216 };
217
218 class LoadStringSlowPathX86 : public SlowPathCode {
219 public:
LoadStringSlowPathX86(HLoadString * instruction)220 explicit LoadStringSlowPathX86(HLoadString* instruction): SlowPathCode(instruction) {}
221
EmitNativeCode(CodeGenerator * codegen)222 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
223 LocationSummary* locations = instruction_->GetLocations();
224 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
225
226 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
227 __ Bind(GetEntryLabel());
228 SaveLiveRegisters(codegen, locations);
229
230 InvokeRuntimeCallingConvention calling_convention;
231 const dex::StringIndex string_index = instruction_->AsLoadString()->GetStringIndex();
232 __ movl(calling_convention.GetRegisterAt(0), Immediate(string_index.index_));
233 x86_codegen->InvokeRuntime(kQuickResolveString, instruction_, instruction_->GetDexPc(), this);
234 CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
235 x86_codegen->Move32(locations->Out(), Location::RegisterLocation(EAX));
236 RestoreLiveRegisters(codegen, locations);
237
238 // Store the resolved String to the BSS entry.
239 Register method_address = locations->InAt(0).AsRegister<Register>();
240 __ movl(Address(method_address, CodeGeneratorX86::kDummy32BitOffset),
241 locations->Out().AsRegister<Register>());
242 Label* fixup_label = x86_codegen->NewStringBssEntryPatch(instruction_->AsLoadString());
243 __ Bind(fixup_label);
244
245 __ jmp(GetExitLabel());
246 }
247
GetDescription() const248 const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathX86"; }
249
250 private:
251 DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathX86);
252 };
253
254 class LoadClassSlowPathX86 : public SlowPathCode {
255 public:
LoadClassSlowPathX86(HLoadClass * cls,HInstruction * at,uint32_t dex_pc,bool do_clinit)256 LoadClassSlowPathX86(HLoadClass* cls,
257 HInstruction* at,
258 uint32_t dex_pc,
259 bool do_clinit)
260 : SlowPathCode(at), cls_(cls), dex_pc_(dex_pc), do_clinit_(do_clinit) {
261 DCHECK(at->IsLoadClass() || at->IsClinitCheck());
262 }
263
EmitNativeCode(CodeGenerator * codegen)264 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
265 LocationSummary* locations = instruction_->GetLocations();
266 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
267 __ Bind(GetEntryLabel());
268 SaveLiveRegisters(codegen, locations);
269
270 InvokeRuntimeCallingConvention calling_convention;
271 dex::TypeIndex type_index = cls_->GetTypeIndex();
272 __ movl(calling_convention.GetRegisterAt(0), Immediate(type_index.index_));
273 x86_codegen->InvokeRuntime(do_clinit_ ? kQuickInitializeStaticStorage
274 : kQuickInitializeType,
275 instruction_,
276 dex_pc_,
277 this);
278 if (do_clinit_) {
279 CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>();
280 } else {
281 CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>();
282 }
283
284 // Move the class to the desired location.
285 Location out = locations->Out();
286 if (out.IsValid()) {
287 DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
288 x86_codegen->Move32(out, Location::RegisterLocation(EAX));
289 }
290 RestoreLiveRegisters(codegen, locations);
291 // For HLoadClass/kBssEntry, store the resolved Class to the BSS entry.
292 DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_);
293 if (cls_ == instruction_ && cls_->GetLoadKind() == HLoadClass::LoadKind::kBssEntry) {
294 DCHECK(out.IsValid());
295 Register method_address = locations->InAt(0).AsRegister<Register>();
296 __ movl(Address(method_address, CodeGeneratorX86::kDummy32BitOffset),
297 locations->Out().AsRegister<Register>());
298 Label* fixup_label = x86_codegen->NewTypeBssEntryPatch(cls_);
299 __ Bind(fixup_label);
300 }
301 __ jmp(GetExitLabel());
302 }
303
GetDescription() const304 const char* GetDescription() const OVERRIDE { return "LoadClassSlowPathX86"; }
305
306 private:
307 // The class this slow path will load.
308 HLoadClass* const cls_;
309
310 // The dex PC of `at_`.
311 const uint32_t dex_pc_;
312
313 // Whether to initialize the class.
314 const bool do_clinit_;
315
316 DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathX86);
317 };
318
319 class TypeCheckSlowPathX86 : public SlowPathCode {
320 public:
TypeCheckSlowPathX86(HInstruction * instruction,bool is_fatal)321 TypeCheckSlowPathX86(HInstruction* instruction, bool is_fatal)
322 : SlowPathCode(instruction), is_fatal_(is_fatal) {}
323
EmitNativeCode(CodeGenerator * codegen)324 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
325 LocationSummary* locations = instruction_->GetLocations();
326 DCHECK(instruction_->IsCheckCast()
327 || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
328
329 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
330 __ Bind(GetEntryLabel());
331
332 if (!is_fatal_) {
333 SaveLiveRegisters(codegen, locations);
334 }
335
336 // We're moving two locations to locations that could overlap, so we need a parallel
337 // move resolver.
338 InvokeRuntimeCallingConvention calling_convention;
339 x86_codegen->EmitParallelMoves(locations->InAt(0),
340 Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
341 Primitive::kPrimNot,
342 locations->InAt(1),
343 Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
344 Primitive::kPrimNot);
345 if (instruction_->IsInstanceOf()) {
346 x86_codegen->InvokeRuntime(kQuickInstanceofNonTrivial,
347 instruction_,
348 instruction_->GetDexPc(),
349 this);
350 CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Object*, mirror::Class*>();
351 } else {
352 DCHECK(instruction_->IsCheckCast());
353 x86_codegen->InvokeRuntime(kQuickCheckInstanceOf,
354 instruction_,
355 instruction_->GetDexPc(),
356 this);
357 CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>();
358 }
359
360 if (!is_fatal_) {
361 if (instruction_->IsInstanceOf()) {
362 x86_codegen->Move32(locations->Out(), Location::RegisterLocation(EAX));
363 }
364 RestoreLiveRegisters(codegen, locations);
365
366 __ jmp(GetExitLabel());
367 }
368 }
369
GetDescription() const370 const char* GetDescription() const OVERRIDE { return "TypeCheckSlowPathX86"; }
IsFatal() const371 bool IsFatal() const OVERRIDE { return is_fatal_; }
372
373 private:
374 const bool is_fatal_;
375
376 DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathX86);
377 };
378
379 class DeoptimizationSlowPathX86 : public SlowPathCode {
380 public:
DeoptimizationSlowPathX86(HDeoptimize * instruction)381 explicit DeoptimizationSlowPathX86(HDeoptimize* instruction)
382 : SlowPathCode(instruction) {}
383
EmitNativeCode(CodeGenerator * codegen)384 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
385 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
386 __ Bind(GetEntryLabel());
387 LocationSummary* locations = instruction_->GetLocations();
388 SaveLiveRegisters(codegen, locations);
389 InvokeRuntimeCallingConvention calling_convention;
390 x86_codegen->Load32BitValue(
391 calling_convention.GetRegisterAt(0),
392 static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind()));
393 x86_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this);
394 CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>();
395 }
396
GetDescription() const397 const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathX86"; }
398
399 private:
400 DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathX86);
401 };
402
403 class ArraySetSlowPathX86 : public SlowPathCode {
404 public:
ArraySetSlowPathX86(HInstruction * instruction)405 explicit ArraySetSlowPathX86(HInstruction* instruction) : SlowPathCode(instruction) {}
406
EmitNativeCode(CodeGenerator * codegen)407 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
408 LocationSummary* locations = instruction_->GetLocations();
409 __ Bind(GetEntryLabel());
410 SaveLiveRegisters(codegen, locations);
411
412 InvokeRuntimeCallingConvention calling_convention;
413 HParallelMove parallel_move(codegen->GetGraph()->GetArena());
414 parallel_move.AddMove(
415 locations->InAt(0),
416 Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
417 Primitive::kPrimNot,
418 nullptr);
419 parallel_move.AddMove(
420 locations->InAt(1),
421 Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
422 Primitive::kPrimInt,
423 nullptr);
424 parallel_move.AddMove(
425 locations->InAt(2),
426 Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
427 Primitive::kPrimNot,
428 nullptr);
429 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move);
430
431 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
432 x86_codegen->InvokeRuntime(kQuickAputObject, instruction_, instruction_->GetDexPc(), this);
433 CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
434 RestoreLiveRegisters(codegen, locations);
435 __ jmp(GetExitLabel());
436 }
437
GetDescription() const438 const char* GetDescription() const OVERRIDE { return "ArraySetSlowPathX86"; }
439
440 private:
441 DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86);
442 };
443
444 // Slow path marking an object reference `ref` during a read
445 // barrier. The field `obj.field` in the object `obj` holding this
446 // reference does not get updated by this slow path after marking (see
447 // ReadBarrierMarkAndUpdateFieldSlowPathX86 below for that).
448 //
449 // This means that after the execution of this slow path, `ref` will
450 // always be up-to-date, but `obj.field` may not; i.e., after the
451 // flip, `ref` will be a to-space reference, but `obj.field` will
452 // probably still be a from-space reference (unless it gets updated by
453 // another thread, or if another thread installed another object
454 // reference (different from `ref`) in `obj.field`).
455 class ReadBarrierMarkSlowPathX86 : public SlowPathCode {
456 public:
ReadBarrierMarkSlowPathX86(HInstruction * instruction,Location ref,bool unpoison_ref_before_marking)457 ReadBarrierMarkSlowPathX86(HInstruction* instruction,
458 Location ref,
459 bool unpoison_ref_before_marking)
460 : SlowPathCode(instruction),
461 ref_(ref),
462 unpoison_ref_before_marking_(unpoison_ref_before_marking) {
463 DCHECK(kEmitCompilerReadBarrier);
464 }
465
GetDescription() const466 const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathX86"; }
467
EmitNativeCode(CodeGenerator * codegen)468 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
469 LocationSummary* locations = instruction_->GetLocations();
470 Register ref_reg = ref_.AsRegister<Register>();
471 DCHECK(locations->CanCall());
472 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
473 DCHECK(instruction_->IsInstanceFieldGet() ||
474 instruction_->IsStaticFieldGet() ||
475 instruction_->IsArrayGet() ||
476 instruction_->IsArraySet() ||
477 instruction_->IsLoadClass() ||
478 instruction_->IsLoadString() ||
479 instruction_->IsInstanceOf() ||
480 instruction_->IsCheckCast() ||
481 (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) ||
482 (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified()))
483 << "Unexpected instruction in read barrier marking slow path: "
484 << instruction_->DebugName();
485
486 __ Bind(GetEntryLabel());
487 if (unpoison_ref_before_marking_) {
488 // Object* ref = ref_addr->AsMirrorPtr()
489 __ MaybeUnpoisonHeapReference(ref_reg);
490 }
491 // No need to save live registers; it's taken care of by the
492 // entrypoint. Also, there is no need to update the stack mask,
493 // as this runtime call will not trigger a garbage collection.
494 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
495 DCHECK_NE(ref_reg, ESP);
496 DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg;
497 // "Compact" slow path, saving two moves.
498 //
499 // Instead of using the standard runtime calling convention (input
500 // and output in EAX):
501 //
502 // EAX <- ref
503 // EAX <- ReadBarrierMark(EAX)
504 // ref <- EAX
505 //
506 // we just use rX (the register containing `ref`) as input and output
507 // of a dedicated entrypoint:
508 //
509 // rX <- ReadBarrierMarkRegX(rX)
510 //
511 int32_t entry_point_offset =
512 CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86PointerSize>(ref_reg);
513 // This runtime call does not require a stack map.
514 x86_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
515 __ jmp(GetExitLabel());
516 }
517
518 private:
519 // The location (register) of the marked object reference.
520 const Location ref_;
521 // Should the reference in `ref_` be unpoisoned prior to marking it?
522 const bool unpoison_ref_before_marking_;
523
524 DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathX86);
525 };
526
527 // Slow path marking an object reference `ref` during a read barrier,
528 // and if needed, atomically updating the field `obj.field` in the
529 // object `obj` holding this reference after marking (contrary to
530 // ReadBarrierMarkSlowPathX86 above, which never tries to update
531 // `obj.field`).
532 //
533 // This means that after the execution of this slow path, both `ref`
534 // and `obj.field` will be up-to-date; i.e., after the flip, both will
535 // hold the same to-space reference (unless another thread installed
536 // another object reference (different from `ref`) in `obj.field`).
537 class ReadBarrierMarkAndUpdateFieldSlowPathX86 : public SlowPathCode {
538 public:
ReadBarrierMarkAndUpdateFieldSlowPathX86(HInstruction * instruction,Location ref,Register obj,const Address & field_addr,bool unpoison_ref_before_marking,Register temp)539 ReadBarrierMarkAndUpdateFieldSlowPathX86(HInstruction* instruction,
540 Location ref,
541 Register obj,
542 const Address& field_addr,
543 bool unpoison_ref_before_marking,
544 Register temp)
545 : SlowPathCode(instruction),
546 ref_(ref),
547 obj_(obj),
548 field_addr_(field_addr),
549 unpoison_ref_before_marking_(unpoison_ref_before_marking),
550 temp_(temp) {
551 DCHECK(kEmitCompilerReadBarrier);
552 }
553
GetDescription() const554 const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkAndUpdateFieldSlowPathX86"; }
555
EmitNativeCode(CodeGenerator * codegen)556 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
557 LocationSummary* locations = instruction_->GetLocations();
558 Register ref_reg = ref_.AsRegister<Register>();
559 DCHECK(locations->CanCall());
560 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
561 // This slow path is only used by the UnsafeCASObject intrinsic.
562 DCHECK((instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
563 << "Unexpected instruction in read barrier marking and field updating slow path: "
564 << instruction_->DebugName();
565 DCHECK(instruction_->GetLocations()->Intrinsified());
566 DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kUnsafeCASObject);
567
568 __ Bind(GetEntryLabel());
569 if (unpoison_ref_before_marking_) {
570 // Object* ref = ref_addr->AsMirrorPtr()
571 __ MaybeUnpoisonHeapReference(ref_reg);
572 }
573
574 // Save the old (unpoisoned) reference.
575 __ movl(temp_, ref_reg);
576
577 // No need to save live registers; it's taken care of by the
578 // entrypoint. Also, there is no need to update the stack mask,
579 // as this runtime call will not trigger a garbage collection.
580 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
581 DCHECK_NE(ref_reg, ESP);
582 DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg;
583 // "Compact" slow path, saving two moves.
584 //
585 // Instead of using the standard runtime calling convention (input
586 // and output in EAX):
587 //
588 // EAX <- ref
589 // EAX <- ReadBarrierMark(EAX)
590 // ref <- EAX
591 //
592 // we just use rX (the register containing `ref`) as input and output
593 // of a dedicated entrypoint:
594 //
595 // rX <- ReadBarrierMarkRegX(rX)
596 //
597 int32_t entry_point_offset =
598 CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86PointerSize>(ref_reg);
599 // This runtime call does not require a stack map.
600 x86_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
601
602 // If the new reference is different from the old reference,
603 // update the field in the holder (`*field_addr`).
604 //
605 // Note that this field could also hold a different object, if
606 // another thread had concurrently changed it. In that case, the
607 // LOCK CMPXCHGL instruction in the compare-and-set (CAS)
608 // operation below would abort the CAS, leaving the field as-is.
609 NearLabel done;
610 __ cmpl(temp_, ref_reg);
611 __ j(kEqual, &done);
612
613 // Update the the holder's field atomically. This may fail if
614 // mutator updates before us, but it's OK. This is achieved
615 // using a strong compare-and-set (CAS) operation with relaxed
616 // memory synchronization ordering, where the expected value is
617 // the old reference and the desired value is the new reference.
618 // This operation is implemented with a 32-bit LOCK CMPXLCHG
619 // instruction, which requires the expected value (the old
620 // reference) to be in EAX. Save EAX beforehand, and move the
621 // expected value (stored in `temp_`) into EAX.
622 __ pushl(EAX);
623 __ movl(EAX, temp_);
624
625 // Convenience aliases.
626 Register base = obj_;
627 Register expected = EAX;
628 Register value = ref_reg;
629
630 bool base_equals_value = (base == value);
631 if (kPoisonHeapReferences) {
632 if (base_equals_value) {
633 // If `base` and `value` are the same register location, move
634 // `value` to a temporary register. This way, poisoning
635 // `value` won't invalidate `base`.
636 value = temp_;
637 __ movl(value, base);
638 }
639
640 // Check that the register allocator did not assign the location
641 // of `expected` (EAX) to `value` nor to `base`, so that heap
642 // poisoning (when enabled) works as intended below.
643 // - If `value` were equal to `expected`, both references would
644 // be poisoned twice, meaning they would not be poisoned at
645 // all, as heap poisoning uses address negation.
646 // - If `base` were equal to `expected`, poisoning `expected`
647 // would invalidate `base`.
648 DCHECK_NE(value, expected);
649 DCHECK_NE(base, expected);
650
651 __ PoisonHeapReference(expected);
652 __ PoisonHeapReference(value);
653 }
654
655 __ LockCmpxchgl(field_addr_, value);
656
657 // If heap poisoning is enabled, we need to unpoison the values
658 // that were poisoned earlier.
659 if (kPoisonHeapReferences) {
660 if (base_equals_value) {
661 // `value` has been moved to a temporary register, no need
662 // to unpoison it.
663 } else {
664 __ UnpoisonHeapReference(value);
665 }
666 // No need to unpoison `expected` (EAX), as it is be overwritten below.
667 }
668
669 // Restore EAX.
670 __ popl(EAX);
671
672 __ Bind(&done);
673 __ jmp(GetExitLabel());
674 }
675
676 private:
677 // The location (register) of the marked object reference.
678 const Location ref_;
679 // The register containing the object holding the marked object reference field.
680 const Register obj_;
681 // The address of the marked reference field. The base of this address must be `obj_`.
682 const Address field_addr_;
683
684 // Should the reference in `ref_` be unpoisoned prior to marking it?
685 const bool unpoison_ref_before_marking_;
686
687 const Register temp_;
688
689 DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathX86);
690 };
691
692 // Slow path generating a read barrier for a heap reference.
693 class ReadBarrierForHeapReferenceSlowPathX86 : public SlowPathCode {
694 public:
ReadBarrierForHeapReferenceSlowPathX86(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)695 ReadBarrierForHeapReferenceSlowPathX86(HInstruction* instruction,
696 Location out,
697 Location ref,
698 Location obj,
699 uint32_t offset,
700 Location index)
701 : SlowPathCode(instruction),
702 out_(out),
703 ref_(ref),
704 obj_(obj),
705 offset_(offset),
706 index_(index) {
707 DCHECK(kEmitCompilerReadBarrier);
708 // If `obj` is equal to `out` or `ref`, it means the initial object
709 // has been overwritten by (or after) the heap object reference load
710 // to be instrumented, e.g.:
711 //
712 // __ movl(out, Address(out, offset));
713 // codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset);
714 //
715 // In that case, we have lost the information about the original
716 // object, and the emitted read barrier cannot work properly.
717 DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out;
718 DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref;
719 }
720
EmitNativeCode(CodeGenerator * codegen)721 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
722 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
723 LocationSummary* locations = instruction_->GetLocations();
724 Register reg_out = out_.AsRegister<Register>();
725 DCHECK(locations->CanCall());
726 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
727 DCHECK(instruction_->IsInstanceFieldGet() ||
728 instruction_->IsStaticFieldGet() ||
729 instruction_->IsArrayGet() ||
730 instruction_->IsInstanceOf() ||
731 instruction_->IsCheckCast() ||
732 (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
733 << "Unexpected instruction in read barrier for heap reference slow path: "
734 << instruction_->DebugName();
735
736 __ Bind(GetEntryLabel());
737 SaveLiveRegisters(codegen, locations);
738
739 // We may have to change the index's value, but as `index_` is a
740 // constant member (like other "inputs" of this slow path),
741 // introduce a copy of it, `index`.
742 Location index = index_;
743 if (index_.IsValid()) {
744 // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics.
745 if (instruction_->IsArrayGet()) {
746 // Compute the actual memory offset and store it in `index`.
747 Register index_reg = index_.AsRegister<Register>();
748 DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_reg));
749 if (codegen->IsCoreCalleeSaveRegister(index_reg)) {
750 // We are about to change the value of `index_reg` (see the
751 // calls to art::x86::X86Assembler::shll and
752 // art::x86::X86Assembler::AddImmediate below), but it has
753 // not been saved by the previous call to
754 // art::SlowPathCode::SaveLiveRegisters, as it is a
755 // callee-save register --
756 // art::SlowPathCode::SaveLiveRegisters does not consider
757 // callee-save registers, as it has been designed with the
758 // assumption that callee-save registers are supposed to be
759 // handled by the called function. So, as a callee-save
760 // register, `index_reg` _would_ eventually be saved onto
761 // the stack, but it would be too late: we would have
762 // changed its value earlier. Therefore, we manually save
763 // it here into another freely available register,
764 // `free_reg`, chosen of course among the caller-save
765 // registers (as a callee-save `free_reg` register would
766 // exhibit the same problem).
767 //
768 // Note we could have requested a temporary register from
769 // the register allocator instead; but we prefer not to, as
770 // this is a slow path, and we know we can find a
771 // caller-save register that is available.
772 Register free_reg = FindAvailableCallerSaveRegister(codegen);
773 __ movl(free_reg, index_reg);
774 index_reg = free_reg;
775 index = Location::RegisterLocation(index_reg);
776 } else {
777 // The initial register stored in `index_` has already been
778 // saved in the call to art::SlowPathCode::SaveLiveRegisters
779 // (as it is not a callee-save register), so we can freely
780 // use it.
781 }
782 // Shifting the index value contained in `index_reg` by the scale
783 // factor (2) cannot overflow in practice, as the runtime is
784 // unable to allocate object arrays with a size larger than
785 // 2^26 - 1 (that is, 2^28 - 4 bytes).
786 __ shll(index_reg, Immediate(TIMES_4));
787 static_assert(
788 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
789 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
790 __ AddImmediate(index_reg, Immediate(offset_));
791 } else {
792 // In the case of the UnsafeGetObject/UnsafeGetObjectVolatile
793 // intrinsics, `index_` is not shifted by a scale factor of 2
794 // (as in the case of ArrayGet), as it is actually an offset
795 // to an object field within an object.
796 DCHECK(instruction_->IsInvoke()) << instruction_->DebugName();
797 DCHECK(instruction_->GetLocations()->Intrinsified());
798 DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) ||
799 (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile))
800 << instruction_->AsInvoke()->GetIntrinsic();
801 DCHECK_EQ(offset_, 0U);
802 DCHECK(index_.IsRegisterPair());
803 // UnsafeGet's offset location is a register pair, the low
804 // part contains the correct offset.
805 index = index_.ToLow();
806 }
807 }
808
809 // We're moving two or three locations to locations that could
810 // overlap, so we need a parallel move resolver.
811 InvokeRuntimeCallingConvention calling_convention;
812 HParallelMove parallel_move(codegen->GetGraph()->GetArena());
813 parallel_move.AddMove(ref_,
814 Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
815 Primitive::kPrimNot,
816 nullptr);
817 parallel_move.AddMove(obj_,
818 Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
819 Primitive::kPrimNot,
820 nullptr);
821 if (index.IsValid()) {
822 parallel_move.AddMove(index,
823 Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
824 Primitive::kPrimInt,
825 nullptr);
826 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move);
827 } else {
828 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move);
829 __ movl(calling_convention.GetRegisterAt(2), Immediate(offset_));
830 }
831 x86_codegen->InvokeRuntime(kQuickReadBarrierSlow, instruction_, instruction_->GetDexPc(), this);
832 CheckEntrypointTypes<
833 kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>();
834 x86_codegen->Move32(out_, Location::RegisterLocation(EAX));
835
836 RestoreLiveRegisters(codegen, locations);
837 __ jmp(GetExitLabel());
838 }
839
GetDescription() const840 const char* GetDescription() const OVERRIDE { return "ReadBarrierForHeapReferenceSlowPathX86"; }
841
842 private:
FindAvailableCallerSaveRegister(CodeGenerator * codegen)843 Register FindAvailableCallerSaveRegister(CodeGenerator* codegen) {
844 size_t ref = static_cast<int>(ref_.AsRegister<Register>());
845 size_t obj = static_cast<int>(obj_.AsRegister<Register>());
846 for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
847 if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) {
848 return static_cast<Register>(i);
849 }
850 }
851 // We shall never fail to find a free caller-save register, as
852 // there are more than two core caller-save registers on x86
853 // (meaning it is possible to find one which is different from
854 // `ref` and `obj`).
855 DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u);
856 LOG(FATAL) << "Could not find a free caller-save register";
857 UNREACHABLE();
858 }
859
860 const Location out_;
861 const Location ref_;
862 const Location obj_;
863 const uint32_t offset_;
864 // An additional location containing an index to an array.
865 // Only used for HArrayGet and the UnsafeGetObject &
866 // UnsafeGetObjectVolatile intrinsics.
867 const Location index_;
868
869 DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathX86);
870 };
871
872 // Slow path generating a read barrier for a GC root.
873 class ReadBarrierForRootSlowPathX86 : public SlowPathCode {
874 public:
ReadBarrierForRootSlowPathX86(HInstruction * instruction,Location out,Location root)875 ReadBarrierForRootSlowPathX86(HInstruction* instruction, Location out, Location root)
876 : SlowPathCode(instruction), out_(out), root_(root) {
877 DCHECK(kEmitCompilerReadBarrier);
878 }
879
EmitNativeCode(CodeGenerator * codegen)880 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
881 LocationSummary* locations = instruction_->GetLocations();
882 Register reg_out = out_.AsRegister<Register>();
883 DCHECK(locations->CanCall());
884 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
885 DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
886 << "Unexpected instruction in read barrier for GC root slow path: "
887 << instruction_->DebugName();
888
889 __ Bind(GetEntryLabel());
890 SaveLiveRegisters(codegen, locations);
891
892 InvokeRuntimeCallingConvention calling_convention;
893 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
894 x86_codegen->Move32(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), root_);
895 x86_codegen->InvokeRuntime(kQuickReadBarrierForRootSlow,
896 instruction_,
897 instruction_->GetDexPc(),
898 this);
899 CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>();
900 x86_codegen->Move32(out_, Location::RegisterLocation(EAX));
901
902 RestoreLiveRegisters(codegen, locations);
903 __ jmp(GetExitLabel());
904 }
905
GetDescription() const906 const char* GetDescription() const OVERRIDE { return "ReadBarrierForRootSlowPathX86"; }
907
908 private:
909 const Location out_;
910 const Location root_;
911
912 DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathX86);
913 };
914
915 #undef __
916 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
917 #define __ down_cast<X86Assembler*>(GetAssembler())-> // NOLINT
918
X86Condition(IfCondition cond)919 inline Condition X86Condition(IfCondition cond) {
920 switch (cond) {
921 case kCondEQ: return kEqual;
922 case kCondNE: return kNotEqual;
923 case kCondLT: return kLess;
924 case kCondLE: return kLessEqual;
925 case kCondGT: return kGreater;
926 case kCondGE: return kGreaterEqual;
927 case kCondB: return kBelow;
928 case kCondBE: return kBelowEqual;
929 case kCondA: return kAbove;
930 case kCondAE: return kAboveEqual;
931 }
932 LOG(FATAL) << "Unreachable";
933 UNREACHABLE();
934 }
935
936 // Maps signed condition to unsigned condition and FP condition to x86 name.
X86UnsignedOrFPCondition(IfCondition cond)937 inline Condition X86UnsignedOrFPCondition(IfCondition cond) {
938 switch (cond) {
939 case kCondEQ: return kEqual;
940 case kCondNE: return kNotEqual;
941 // Signed to unsigned, and FP to x86 name.
942 case kCondLT: return kBelow;
943 case kCondLE: return kBelowEqual;
944 case kCondGT: return kAbove;
945 case kCondGE: return kAboveEqual;
946 // Unsigned remain unchanged.
947 case kCondB: return kBelow;
948 case kCondBE: return kBelowEqual;
949 case kCondA: return kAbove;
950 case kCondAE: return kAboveEqual;
951 }
952 LOG(FATAL) << "Unreachable";
953 UNREACHABLE();
954 }
955
DumpCoreRegister(std::ostream & stream,int reg) const956 void CodeGeneratorX86::DumpCoreRegister(std::ostream& stream, int reg) const {
957 stream << Register(reg);
958 }
959
DumpFloatingPointRegister(std::ostream & stream,int reg) const960 void CodeGeneratorX86::DumpFloatingPointRegister(std::ostream& stream, int reg) const {
961 stream << XmmRegister(reg);
962 }
963
SaveCoreRegister(size_t stack_index,uint32_t reg_id)964 size_t CodeGeneratorX86::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
965 __ movl(Address(ESP, stack_index), static_cast<Register>(reg_id));
966 return kX86WordSize;
967 }
968
RestoreCoreRegister(size_t stack_index,uint32_t reg_id)969 size_t CodeGeneratorX86::RestoreCoreRegister(size_t stack_index, uint32_t reg_id) {
970 __ movl(static_cast<Register>(reg_id), Address(ESP, stack_index));
971 return kX86WordSize;
972 }
973
SaveFloatingPointRegister(size_t stack_index,uint32_t reg_id)974 size_t CodeGeneratorX86::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
975 if (GetGraph()->HasSIMD()) {
976 __ movups(Address(ESP, stack_index), XmmRegister(reg_id));
977 } else {
978 __ movsd(Address(ESP, stack_index), XmmRegister(reg_id));
979 }
980 return GetFloatingPointSpillSlotSize();
981 }
982
RestoreFloatingPointRegister(size_t stack_index,uint32_t reg_id)983 size_t CodeGeneratorX86::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
984 if (GetGraph()->HasSIMD()) {
985 __ movups(XmmRegister(reg_id), Address(ESP, stack_index));
986 } else {
987 __ movsd(XmmRegister(reg_id), Address(ESP, stack_index));
988 }
989 return GetFloatingPointSpillSlotSize();
990 }
991
InvokeRuntime(QuickEntrypointEnum entrypoint,HInstruction * instruction,uint32_t dex_pc,SlowPathCode * slow_path)992 void CodeGeneratorX86::InvokeRuntime(QuickEntrypointEnum entrypoint,
993 HInstruction* instruction,
994 uint32_t dex_pc,
995 SlowPathCode* slow_path) {
996 ValidateInvokeRuntime(entrypoint, instruction, slow_path);
997 GenerateInvokeRuntime(GetThreadOffset<kX86PointerSize>(entrypoint).Int32Value());
998 if (EntrypointRequiresStackMap(entrypoint)) {
999 RecordPcInfo(instruction, dex_pc, slow_path);
1000 }
1001 }
1002
InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,HInstruction * instruction,SlowPathCode * slow_path)1003 void CodeGeneratorX86::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
1004 HInstruction* instruction,
1005 SlowPathCode* slow_path) {
1006 ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path);
1007 GenerateInvokeRuntime(entry_point_offset);
1008 }
1009
GenerateInvokeRuntime(int32_t entry_point_offset)1010 void CodeGeneratorX86::GenerateInvokeRuntime(int32_t entry_point_offset) {
1011 __ fs()->call(Address::Absolute(entry_point_offset));
1012 }
1013
CodeGeneratorX86(HGraph * graph,const X86InstructionSetFeatures & isa_features,const CompilerOptions & compiler_options,OptimizingCompilerStats * stats)1014 CodeGeneratorX86::CodeGeneratorX86(HGraph* graph,
1015 const X86InstructionSetFeatures& isa_features,
1016 const CompilerOptions& compiler_options,
1017 OptimizingCompilerStats* stats)
1018 : CodeGenerator(graph,
1019 kNumberOfCpuRegisters,
1020 kNumberOfXmmRegisters,
1021 kNumberOfRegisterPairs,
1022 ComputeRegisterMask(reinterpret_cast<const int*>(kCoreCalleeSaves),
1023 arraysize(kCoreCalleeSaves))
1024 | (1 << kFakeReturnRegister),
1025 0,
1026 compiler_options,
1027 stats),
1028 block_labels_(nullptr),
1029 location_builder_(graph, this),
1030 instruction_visitor_(graph, this),
1031 move_resolver_(graph->GetArena(), this),
1032 assembler_(graph->GetArena()),
1033 isa_features_(isa_features),
1034 pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
1035 string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
1036 boot_image_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
1037 type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
1038 jit_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
1039 jit_class_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
1040 constant_area_start_(-1),
1041 fixups_to_jump_tables_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
1042 method_address_offset_(std::less<uint32_t>(),
1043 graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
1044 // Use a fake return address register to mimic Quick.
1045 AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister));
1046 }
1047
SetupBlockedRegisters() const1048 void CodeGeneratorX86::SetupBlockedRegisters() const {
1049 // Stack register is always reserved.
1050 blocked_core_registers_[ESP] = true;
1051 }
1052
InstructionCodeGeneratorX86(HGraph * graph,CodeGeneratorX86 * codegen)1053 InstructionCodeGeneratorX86::InstructionCodeGeneratorX86(HGraph* graph, CodeGeneratorX86* codegen)
1054 : InstructionCodeGenerator(graph, codegen),
1055 assembler_(codegen->GetAssembler()),
1056 codegen_(codegen) {}
1057
DWARFReg(Register reg)1058 static dwarf::Reg DWARFReg(Register reg) {
1059 return dwarf::Reg::X86Core(static_cast<int>(reg));
1060 }
1061
GenerateFrameEntry()1062 void CodeGeneratorX86::GenerateFrameEntry() {
1063 __ cfi().SetCurrentCFAOffset(kX86WordSize); // return address
1064 __ Bind(&frame_entry_label_);
1065 bool skip_overflow_check =
1066 IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86);
1067 DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
1068
1069 if (!skip_overflow_check) {
1070 __ testl(EAX, Address(ESP, -static_cast<int32_t>(GetStackOverflowReservedBytes(kX86))));
1071 RecordPcInfo(nullptr, 0);
1072 }
1073
1074 if (HasEmptyFrame()) {
1075 return;
1076 }
1077
1078 for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) {
1079 Register reg = kCoreCalleeSaves[i];
1080 if (allocated_registers_.ContainsCoreRegister(reg)) {
1081 __ pushl(reg);
1082 __ cfi().AdjustCFAOffset(kX86WordSize);
1083 __ cfi().RelOffset(DWARFReg(reg), 0);
1084 }
1085 }
1086
1087 if (GetGraph()->HasShouldDeoptimizeFlag()) {
1088 // Initialize should_deoptimize flag to 0.
1089 __ movl(Address(ESP, -kShouldDeoptimizeFlagSize), Immediate(0));
1090 }
1091
1092 int adjust = GetFrameSize() - FrameEntrySpillSize();
1093 __ subl(ESP, Immediate(adjust));
1094 __ cfi().AdjustCFAOffset(adjust);
1095 // Save the current method if we need it. Note that we do not
1096 // do this in HCurrentMethod, as the instruction might have been removed
1097 // in the SSA graph.
1098 if (RequiresCurrentMethod()) {
1099 __ movl(Address(ESP, kCurrentMethodStackOffset), kMethodRegisterArgument);
1100 }
1101 }
1102
GenerateFrameExit()1103 void CodeGeneratorX86::GenerateFrameExit() {
1104 __ cfi().RememberState();
1105 if (!HasEmptyFrame()) {
1106 int adjust = GetFrameSize() - FrameEntrySpillSize();
1107 __ addl(ESP, Immediate(adjust));
1108 __ cfi().AdjustCFAOffset(-adjust);
1109
1110 for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
1111 Register reg = kCoreCalleeSaves[i];
1112 if (allocated_registers_.ContainsCoreRegister(reg)) {
1113 __ popl(reg);
1114 __ cfi().AdjustCFAOffset(-static_cast<int>(kX86WordSize));
1115 __ cfi().Restore(DWARFReg(reg));
1116 }
1117 }
1118 }
1119 __ ret();
1120 __ cfi().RestoreState();
1121 __ cfi().DefCFAOffset(GetFrameSize());
1122 }
1123
Bind(HBasicBlock * block)1124 void CodeGeneratorX86::Bind(HBasicBlock* block) {
1125 __ Bind(GetLabelOf(block));
1126 }
1127
GetReturnLocation(Primitive::Type type) const1128 Location InvokeDexCallingConventionVisitorX86::GetReturnLocation(Primitive::Type type) const {
1129 switch (type) {
1130 case Primitive::kPrimBoolean:
1131 case Primitive::kPrimByte:
1132 case Primitive::kPrimChar:
1133 case Primitive::kPrimShort:
1134 case Primitive::kPrimInt:
1135 case Primitive::kPrimNot:
1136 return Location::RegisterLocation(EAX);
1137
1138 case Primitive::kPrimLong:
1139 return Location::RegisterPairLocation(EAX, EDX);
1140
1141 case Primitive::kPrimVoid:
1142 return Location::NoLocation();
1143
1144 case Primitive::kPrimDouble:
1145 case Primitive::kPrimFloat:
1146 return Location::FpuRegisterLocation(XMM0);
1147 }
1148
1149 UNREACHABLE();
1150 }
1151
GetMethodLocation() const1152 Location InvokeDexCallingConventionVisitorX86::GetMethodLocation() const {
1153 return Location::RegisterLocation(kMethodRegisterArgument);
1154 }
1155
GetNextLocation(Primitive::Type type)1156 Location InvokeDexCallingConventionVisitorX86::GetNextLocation(Primitive::Type type) {
1157 switch (type) {
1158 case Primitive::kPrimBoolean:
1159 case Primitive::kPrimByte:
1160 case Primitive::kPrimChar:
1161 case Primitive::kPrimShort:
1162 case Primitive::kPrimInt:
1163 case Primitive::kPrimNot: {
1164 uint32_t index = gp_index_++;
1165 stack_index_++;
1166 if (index < calling_convention.GetNumberOfRegisters()) {
1167 return Location::RegisterLocation(calling_convention.GetRegisterAt(index));
1168 } else {
1169 return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1));
1170 }
1171 }
1172
1173 case Primitive::kPrimLong: {
1174 uint32_t index = gp_index_;
1175 gp_index_ += 2;
1176 stack_index_ += 2;
1177 if (index + 1 < calling_convention.GetNumberOfRegisters()) {
1178 X86ManagedRegister pair = X86ManagedRegister::FromRegisterPair(
1179 calling_convention.GetRegisterPairAt(index));
1180 return Location::RegisterPairLocation(pair.AsRegisterPairLow(), pair.AsRegisterPairHigh());
1181 } else {
1182 return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2));
1183 }
1184 }
1185
1186 case Primitive::kPrimFloat: {
1187 uint32_t index = float_index_++;
1188 stack_index_++;
1189 if (index < calling_convention.GetNumberOfFpuRegisters()) {
1190 return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
1191 } else {
1192 return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1));
1193 }
1194 }
1195
1196 case Primitive::kPrimDouble: {
1197 uint32_t index = float_index_++;
1198 stack_index_ += 2;
1199 if (index < calling_convention.GetNumberOfFpuRegisters()) {
1200 return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
1201 } else {
1202 return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2));
1203 }
1204 }
1205
1206 case Primitive::kPrimVoid:
1207 LOG(FATAL) << "Unexpected parameter type " << type;
1208 break;
1209 }
1210 return Location::NoLocation();
1211 }
1212
Move32(Location destination,Location source)1213 void CodeGeneratorX86::Move32(Location destination, Location source) {
1214 if (source.Equals(destination)) {
1215 return;
1216 }
1217 if (destination.IsRegister()) {
1218 if (source.IsRegister()) {
1219 __ movl(destination.AsRegister<Register>(), source.AsRegister<Register>());
1220 } else if (source.IsFpuRegister()) {
1221 __ movd(destination.AsRegister<Register>(), source.AsFpuRegister<XmmRegister>());
1222 } else {
1223 DCHECK(source.IsStackSlot());
1224 __ movl(destination.AsRegister<Register>(), Address(ESP, source.GetStackIndex()));
1225 }
1226 } else if (destination.IsFpuRegister()) {
1227 if (source.IsRegister()) {
1228 __ movd(destination.AsFpuRegister<XmmRegister>(), source.AsRegister<Register>());
1229 } else if (source.IsFpuRegister()) {
1230 __ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
1231 } else {
1232 DCHECK(source.IsStackSlot());
1233 __ movss(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex()));
1234 }
1235 } else {
1236 DCHECK(destination.IsStackSlot()) << destination;
1237 if (source.IsRegister()) {
1238 __ movl(Address(ESP, destination.GetStackIndex()), source.AsRegister<Register>());
1239 } else if (source.IsFpuRegister()) {
1240 __ movss(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
1241 } else if (source.IsConstant()) {
1242 HConstant* constant = source.GetConstant();
1243 int32_t value = GetInt32ValueOf(constant);
1244 __ movl(Address(ESP, destination.GetStackIndex()), Immediate(value));
1245 } else {
1246 DCHECK(source.IsStackSlot());
1247 __ pushl(Address(ESP, source.GetStackIndex()));
1248 __ popl(Address(ESP, destination.GetStackIndex()));
1249 }
1250 }
1251 }
1252
Move64(Location destination,Location source)1253 void CodeGeneratorX86::Move64(Location destination, Location source) {
1254 if (source.Equals(destination)) {
1255 return;
1256 }
1257 if (destination.IsRegisterPair()) {
1258 if (source.IsRegisterPair()) {
1259 EmitParallelMoves(
1260 Location::RegisterLocation(source.AsRegisterPairHigh<Register>()),
1261 Location::RegisterLocation(destination.AsRegisterPairHigh<Register>()),
1262 Primitive::kPrimInt,
1263 Location::RegisterLocation(source.AsRegisterPairLow<Register>()),
1264 Location::RegisterLocation(destination.AsRegisterPairLow<Register>()),
1265 Primitive::kPrimInt);
1266 } else if (source.IsFpuRegister()) {
1267 XmmRegister src_reg = source.AsFpuRegister<XmmRegister>();
1268 __ movd(destination.AsRegisterPairLow<Register>(), src_reg);
1269 __ psrlq(src_reg, Immediate(32));
1270 __ movd(destination.AsRegisterPairHigh<Register>(), src_reg);
1271 } else {
1272 // No conflict possible, so just do the moves.
1273 DCHECK(source.IsDoubleStackSlot());
1274 __ movl(destination.AsRegisterPairLow<Register>(), Address(ESP, source.GetStackIndex()));
1275 __ movl(destination.AsRegisterPairHigh<Register>(),
1276 Address(ESP, source.GetHighStackIndex(kX86WordSize)));
1277 }
1278 } else if (destination.IsFpuRegister()) {
1279 if (source.IsFpuRegister()) {
1280 __ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
1281 } else if (source.IsDoubleStackSlot()) {
1282 __ movsd(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex()));
1283 } else if (source.IsRegisterPair()) {
1284 size_t elem_size = Primitive::ComponentSize(Primitive::kPrimInt);
1285 // Create stack space for 2 elements.
1286 __ subl(ESP, Immediate(2 * elem_size));
1287 __ movl(Address(ESP, 0), source.AsRegisterPairLow<Register>());
1288 __ movl(Address(ESP, elem_size), source.AsRegisterPairHigh<Register>());
1289 __ movsd(destination.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
1290 // And remove the temporary stack space we allocated.
1291 __ addl(ESP, Immediate(2 * elem_size));
1292 } else {
1293 LOG(FATAL) << "Unimplemented";
1294 }
1295 } else {
1296 DCHECK(destination.IsDoubleStackSlot()) << destination;
1297 if (source.IsRegisterPair()) {
1298 // No conflict possible, so just do the moves.
1299 __ movl(Address(ESP, destination.GetStackIndex()), source.AsRegisterPairLow<Register>());
1300 __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)),
1301 source.AsRegisterPairHigh<Register>());
1302 } else if (source.IsFpuRegister()) {
1303 __ movsd(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
1304 } else if (source.IsConstant()) {
1305 HConstant* constant = source.GetConstant();
1306 DCHECK(constant->IsLongConstant() || constant->IsDoubleConstant());
1307 int64_t value = GetInt64ValueOf(constant);
1308 __ movl(Address(ESP, destination.GetStackIndex()), Immediate(Low32Bits(value)));
1309 __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)),
1310 Immediate(High32Bits(value)));
1311 } else {
1312 DCHECK(source.IsDoubleStackSlot()) << source;
1313 EmitParallelMoves(
1314 Location::StackSlot(source.GetStackIndex()),
1315 Location::StackSlot(destination.GetStackIndex()),
1316 Primitive::kPrimInt,
1317 Location::StackSlot(source.GetHighStackIndex(kX86WordSize)),
1318 Location::StackSlot(destination.GetHighStackIndex(kX86WordSize)),
1319 Primitive::kPrimInt);
1320 }
1321 }
1322 }
1323
MoveConstant(Location location,int32_t value)1324 void CodeGeneratorX86::MoveConstant(Location location, int32_t value) {
1325 DCHECK(location.IsRegister());
1326 __ movl(location.AsRegister<Register>(), Immediate(value));
1327 }
1328
MoveLocation(Location dst,Location src,Primitive::Type dst_type)1329 void CodeGeneratorX86::MoveLocation(Location dst, Location src, Primitive::Type dst_type) {
1330 HParallelMove move(GetGraph()->GetArena());
1331 if (dst_type == Primitive::kPrimLong && !src.IsConstant() && !src.IsFpuRegister()) {
1332 move.AddMove(src.ToLow(), dst.ToLow(), Primitive::kPrimInt, nullptr);
1333 move.AddMove(src.ToHigh(), dst.ToHigh(), Primitive::kPrimInt, nullptr);
1334 } else {
1335 move.AddMove(src, dst, dst_type, nullptr);
1336 }
1337 GetMoveResolver()->EmitNativeCode(&move);
1338 }
1339
AddLocationAsTemp(Location location,LocationSummary * locations)1340 void CodeGeneratorX86::AddLocationAsTemp(Location location, LocationSummary* locations) {
1341 if (location.IsRegister()) {
1342 locations->AddTemp(location);
1343 } else if (location.IsRegisterPair()) {
1344 locations->AddTemp(Location::RegisterLocation(location.AsRegisterPairLow<Register>()));
1345 locations->AddTemp(Location::RegisterLocation(location.AsRegisterPairHigh<Register>()));
1346 } else {
1347 UNIMPLEMENTED(FATAL) << "AddLocationAsTemp not implemented for location " << location;
1348 }
1349 }
1350
HandleGoto(HInstruction * got,HBasicBlock * successor)1351 void InstructionCodeGeneratorX86::HandleGoto(HInstruction* got, HBasicBlock* successor) {
1352 DCHECK(!successor->IsExitBlock());
1353
1354 HBasicBlock* block = got->GetBlock();
1355 HInstruction* previous = got->GetPrevious();
1356
1357 HLoopInformation* info = block->GetLoopInformation();
1358 if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
1359 GenerateSuspendCheck(info->GetSuspendCheck(), successor);
1360 return;
1361 }
1362
1363 if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) {
1364 GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr);
1365 }
1366 if (!codegen_->GoesToNextBlock(got->GetBlock(), successor)) {
1367 __ jmp(codegen_->GetLabelOf(successor));
1368 }
1369 }
1370
VisitGoto(HGoto * got)1371 void LocationsBuilderX86::VisitGoto(HGoto* got) {
1372 got->SetLocations(nullptr);
1373 }
1374
VisitGoto(HGoto * got)1375 void InstructionCodeGeneratorX86::VisitGoto(HGoto* got) {
1376 HandleGoto(got, got->GetSuccessor());
1377 }
1378
VisitTryBoundary(HTryBoundary * try_boundary)1379 void LocationsBuilderX86::VisitTryBoundary(HTryBoundary* try_boundary) {
1380 try_boundary->SetLocations(nullptr);
1381 }
1382
VisitTryBoundary(HTryBoundary * try_boundary)1383 void InstructionCodeGeneratorX86::VisitTryBoundary(HTryBoundary* try_boundary) {
1384 HBasicBlock* successor = try_boundary->GetNormalFlowSuccessor();
1385 if (!successor->IsExitBlock()) {
1386 HandleGoto(try_boundary, successor);
1387 }
1388 }
1389
VisitExit(HExit * exit)1390 void LocationsBuilderX86::VisitExit(HExit* exit) {
1391 exit->SetLocations(nullptr);
1392 }
1393
VisitExit(HExit * exit ATTRIBUTE_UNUSED)1394 void InstructionCodeGeneratorX86::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
1395 }
1396
1397 template<class LabelType>
GenerateFPJumps(HCondition * cond,LabelType * true_label,LabelType * false_label)1398 void InstructionCodeGeneratorX86::GenerateFPJumps(HCondition* cond,
1399 LabelType* true_label,
1400 LabelType* false_label) {
1401 if (cond->IsFPConditionTrueIfNaN()) {
1402 __ j(kUnordered, true_label);
1403 } else if (cond->IsFPConditionFalseIfNaN()) {
1404 __ j(kUnordered, false_label);
1405 }
1406 __ j(X86UnsignedOrFPCondition(cond->GetCondition()), true_label);
1407 }
1408
1409 template<class LabelType>
GenerateLongComparesAndJumps(HCondition * cond,LabelType * true_label,LabelType * false_label)1410 void InstructionCodeGeneratorX86::GenerateLongComparesAndJumps(HCondition* cond,
1411 LabelType* true_label,
1412 LabelType* false_label) {
1413 LocationSummary* locations = cond->GetLocations();
1414 Location left = locations->InAt(0);
1415 Location right = locations->InAt(1);
1416 IfCondition if_cond = cond->GetCondition();
1417
1418 Register left_high = left.AsRegisterPairHigh<Register>();
1419 Register left_low = left.AsRegisterPairLow<Register>();
1420 IfCondition true_high_cond = if_cond;
1421 IfCondition false_high_cond = cond->GetOppositeCondition();
1422 Condition final_condition = X86UnsignedOrFPCondition(if_cond); // unsigned on lower part
1423
1424 // Set the conditions for the test, remembering that == needs to be
1425 // decided using the low words.
1426 switch (if_cond) {
1427 case kCondEQ:
1428 case kCondNE:
1429 // Nothing to do.
1430 break;
1431 case kCondLT:
1432 false_high_cond = kCondGT;
1433 break;
1434 case kCondLE:
1435 true_high_cond = kCondLT;
1436 break;
1437 case kCondGT:
1438 false_high_cond = kCondLT;
1439 break;
1440 case kCondGE:
1441 true_high_cond = kCondGT;
1442 break;
1443 case kCondB:
1444 false_high_cond = kCondA;
1445 break;
1446 case kCondBE:
1447 true_high_cond = kCondB;
1448 break;
1449 case kCondA:
1450 false_high_cond = kCondB;
1451 break;
1452 case kCondAE:
1453 true_high_cond = kCondA;
1454 break;
1455 }
1456
1457 if (right.IsConstant()) {
1458 int64_t value = right.GetConstant()->AsLongConstant()->GetValue();
1459 int32_t val_high = High32Bits(value);
1460 int32_t val_low = Low32Bits(value);
1461
1462 codegen_->Compare32BitValue(left_high, val_high);
1463 if (if_cond == kCondNE) {
1464 __ j(X86Condition(true_high_cond), true_label);
1465 } else if (if_cond == kCondEQ) {
1466 __ j(X86Condition(false_high_cond), false_label);
1467 } else {
1468 __ j(X86Condition(true_high_cond), true_label);
1469 __ j(X86Condition(false_high_cond), false_label);
1470 }
1471 // Must be equal high, so compare the lows.
1472 codegen_->Compare32BitValue(left_low, val_low);
1473 } else if (right.IsRegisterPair()) {
1474 Register right_high = right.AsRegisterPairHigh<Register>();
1475 Register right_low = right.AsRegisterPairLow<Register>();
1476
1477 __ cmpl(left_high, right_high);
1478 if (if_cond == kCondNE) {
1479 __ j(X86Condition(true_high_cond), true_label);
1480 } else if (if_cond == kCondEQ) {
1481 __ j(X86Condition(false_high_cond), false_label);
1482 } else {
1483 __ j(X86Condition(true_high_cond), true_label);
1484 __ j(X86Condition(false_high_cond), false_label);
1485 }
1486 // Must be equal high, so compare the lows.
1487 __ cmpl(left_low, right_low);
1488 } else {
1489 DCHECK(right.IsDoubleStackSlot());
1490 __ cmpl(left_high, Address(ESP, right.GetHighStackIndex(kX86WordSize)));
1491 if (if_cond == kCondNE) {
1492 __ j(X86Condition(true_high_cond), true_label);
1493 } else if (if_cond == kCondEQ) {
1494 __ j(X86Condition(false_high_cond), false_label);
1495 } else {
1496 __ j(X86Condition(true_high_cond), true_label);
1497 __ j(X86Condition(false_high_cond), false_label);
1498 }
1499 // Must be equal high, so compare the lows.
1500 __ cmpl(left_low, Address(ESP, right.GetStackIndex()));
1501 }
1502 // The last comparison might be unsigned.
1503 __ j(final_condition, true_label);
1504 }
1505
GenerateFPCompare(Location lhs,Location rhs,HInstruction * insn,bool is_double)1506 void InstructionCodeGeneratorX86::GenerateFPCompare(Location lhs,
1507 Location rhs,
1508 HInstruction* insn,
1509 bool is_double) {
1510 HX86LoadFromConstantTable* const_area = insn->InputAt(1)->AsX86LoadFromConstantTable();
1511 if (is_double) {
1512 if (rhs.IsFpuRegister()) {
1513 __ ucomisd(lhs.AsFpuRegister<XmmRegister>(), rhs.AsFpuRegister<XmmRegister>());
1514 } else if (const_area != nullptr) {
1515 DCHECK(const_area->IsEmittedAtUseSite());
1516 __ ucomisd(lhs.AsFpuRegister<XmmRegister>(),
1517 codegen_->LiteralDoubleAddress(
1518 const_area->GetConstant()->AsDoubleConstant()->GetValue(),
1519 const_area->GetBaseMethodAddress(),
1520 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
1521 } else {
1522 DCHECK(rhs.IsDoubleStackSlot());
1523 __ ucomisd(lhs.AsFpuRegister<XmmRegister>(), Address(ESP, rhs.GetStackIndex()));
1524 }
1525 } else {
1526 if (rhs.IsFpuRegister()) {
1527 __ ucomiss(lhs.AsFpuRegister<XmmRegister>(), rhs.AsFpuRegister<XmmRegister>());
1528 } else if (const_area != nullptr) {
1529 DCHECK(const_area->IsEmittedAtUseSite());
1530 __ ucomiss(lhs.AsFpuRegister<XmmRegister>(),
1531 codegen_->LiteralFloatAddress(
1532 const_area->GetConstant()->AsFloatConstant()->GetValue(),
1533 const_area->GetBaseMethodAddress(),
1534 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
1535 } else {
1536 DCHECK(rhs.IsStackSlot());
1537 __ ucomiss(lhs.AsFpuRegister<XmmRegister>(), Address(ESP, rhs.GetStackIndex()));
1538 }
1539 }
1540 }
1541
1542 template<class LabelType>
GenerateCompareTestAndBranch(HCondition * condition,LabelType * true_target_in,LabelType * false_target_in)1543 void InstructionCodeGeneratorX86::GenerateCompareTestAndBranch(HCondition* condition,
1544 LabelType* true_target_in,
1545 LabelType* false_target_in) {
1546 // Generated branching requires both targets to be explicit. If either of the
1547 // targets is nullptr (fallthrough) use and bind `fallthrough_target` instead.
1548 LabelType fallthrough_target;
1549 LabelType* true_target = true_target_in == nullptr ? &fallthrough_target : true_target_in;
1550 LabelType* false_target = false_target_in == nullptr ? &fallthrough_target : false_target_in;
1551
1552 LocationSummary* locations = condition->GetLocations();
1553 Location left = locations->InAt(0);
1554 Location right = locations->InAt(1);
1555
1556 Primitive::Type type = condition->InputAt(0)->GetType();
1557 switch (type) {
1558 case Primitive::kPrimLong:
1559 GenerateLongComparesAndJumps(condition, true_target, false_target);
1560 break;
1561 case Primitive::kPrimFloat:
1562 GenerateFPCompare(left, right, condition, false);
1563 GenerateFPJumps(condition, true_target, false_target);
1564 break;
1565 case Primitive::kPrimDouble:
1566 GenerateFPCompare(left, right, condition, true);
1567 GenerateFPJumps(condition, true_target, false_target);
1568 break;
1569 default:
1570 LOG(FATAL) << "Unexpected compare type " << type;
1571 }
1572
1573 if (false_target != &fallthrough_target) {
1574 __ jmp(false_target);
1575 }
1576
1577 if (fallthrough_target.IsLinked()) {
1578 __ Bind(&fallthrough_target);
1579 }
1580 }
1581
AreEflagsSetFrom(HInstruction * cond,HInstruction * branch)1582 static bool AreEflagsSetFrom(HInstruction* cond, HInstruction* branch) {
1583 // Moves may affect the eflags register (move zero uses xorl), so the EFLAGS
1584 // are set only strictly before `branch`. We can't use the eflags on long/FP
1585 // conditions if they are materialized due to the complex branching.
1586 return cond->IsCondition() &&
1587 cond->GetNext() == branch &&
1588 cond->InputAt(0)->GetType() != Primitive::kPrimLong &&
1589 !Primitive::IsFloatingPointType(cond->InputAt(0)->GetType());
1590 }
1591
1592 template<class LabelType>
GenerateTestAndBranch(HInstruction * instruction,size_t condition_input_index,LabelType * true_target,LabelType * false_target)1593 void InstructionCodeGeneratorX86::GenerateTestAndBranch(HInstruction* instruction,
1594 size_t condition_input_index,
1595 LabelType* true_target,
1596 LabelType* false_target) {
1597 HInstruction* cond = instruction->InputAt(condition_input_index);
1598
1599 if (true_target == nullptr && false_target == nullptr) {
1600 // Nothing to do. The code always falls through.
1601 return;
1602 } else if (cond->IsIntConstant()) {
1603 // Constant condition, statically compared against "true" (integer value 1).
1604 if (cond->AsIntConstant()->IsTrue()) {
1605 if (true_target != nullptr) {
1606 __ jmp(true_target);
1607 }
1608 } else {
1609 DCHECK(cond->AsIntConstant()->IsFalse()) << cond->AsIntConstant()->GetValue();
1610 if (false_target != nullptr) {
1611 __ jmp(false_target);
1612 }
1613 }
1614 return;
1615 }
1616
1617 // The following code generates these patterns:
1618 // (1) true_target == nullptr && false_target != nullptr
1619 // - opposite condition true => branch to false_target
1620 // (2) true_target != nullptr && false_target == nullptr
1621 // - condition true => branch to true_target
1622 // (3) true_target != nullptr && false_target != nullptr
1623 // - condition true => branch to true_target
1624 // - branch to false_target
1625 if (IsBooleanValueOrMaterializedCondition(cond)) {
1626 if (AreEflagsSetFrom(cond, instruction)) {
1627 if (true_target == nullptr) {
1628 __ j(X86Condition(cond->AsCondition()->GetOppositeCondition()), false_target);
1629 } else {
1630 __ j(X86Condition(cond->AsCondition()->GetCondition()), true_target);
1631 }
1632 } else {
1633 // Materialized condition, compare against 0.
1634 Location lhs = instruction->GetLocations()->InAt(condition_input_index);
1635 if (lhs.IsRegister()) {
1636 __ testl(lhs.AsRegister<Register>(), lhs.AsRegister<Register>());
1637 } else {
1638 __ cmpl(Address(ESP, lhs.GetStackIndex()), Immediate(0));
1639 }
1640 if (true_target == nullptr) {
1641 __ j(kEqual, false_target);
1642 } else {
1643 __ j(kNotEqual, true_target);
1644 }
1645 }
1646 } else {
1647 // Condition has not been materialized, use its inputs as the comparison and
1648 // its condition as the branch condition.
1649 HCondition* condition = cond->AsCondition();
1650
1651 // If this is a long or FP comparison that has been folded into
1652 // the HCondition, generate the comparison directly.
1653 Primitive::Type type = condition->InputAt(0)->GetType();
1654 if (type == Primitive::kPrimLong || Primitive::IsFloatingPointType(type)) {
1655 GenerateCompareTestAndBranch(condition, true_target, false_target);
1656 return;
1657 }
1658
1659 Location lhs = condition->GetLocations()->InAt(0);
1660 Location rhs = condition->GetLocations()->InAt(1);
1661 // LHS is guaranteed to be in a register (see LocationsBuilderX86::HandleCondition).
1662 codegen_->GenerateIntCompare(lhs, rhs);
1663 if (true_target == nullptr) {
1664 __ j(X86Condition(condition->GetOppositeCondition()), false_target);
1665 } else {
1666 __ j(X86Condition(condition->GetCondition()), true_target);
1667 }
1668 }
1669
1670 // If neither branch falls through (case 3), the conditional branch to `true_target`
1671 // was already emitted (case 2) and we need to emit a jump to `false_target`.
1672 if (true_target != nullptr && false_target != nullptr) {
1673 __ jmp(false_target);
1674 }
1675 }
1676
VisitIf(HIf * if_instr)1677 void LocationsBuilderX86::VisitIf(HIf* if_instr) {
1678 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(if_instr);
1679 if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
1680 locations->SetInAt(0, Location::Any());
1681 }
1682 }
1683
VisitIf(HIf * if_instr)1684 void InstructionCodeGeneratorX86::VisitIf(HIf* if_instr) {
1685 HBasicBlock* true_successor = if_instr->IfTrueSuccessor();
1686 HBasicBlock* false_successor = if_instr->IfFalseSuccessor();
1687 Label* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ?
1688 nullptr : codegen_->GetLabelOf(true_successor);
1689 Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ?
1690 nullptr : codegen_->GetLabelOf(false_successor);
1691 GenerateTestAndBranch(if_instr, /* condition_input_index */ 0, true_target, false_target);
1692 }
1693
VisitDeoptimize(HDeoptimize * deoptimize)1694 void LocationsBuilderX86::VisitDeoptimize(HDeoptimize* deoptimize) {
1695 LocationSummary* locations = new (GetGraph()->GetArena())
1696 LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
1697 InvokeRuntimeCallingConvention calling_convention;
1698 RegisterSet caller_saves = RegisterSet::Empty();
1699 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1700 locations->SetCustomSlowPathCallerSaves(caller_saves);
1701 if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
1702 locations->SetInAt(0, Location::Any());
1703 }
1704 }
1705
VisitDeoptimize(HDeoptimize * deoptimize)1706 void InstructionCodeGeneratorX86::VisitDeoptimize(HDeoptimize* deoptimize) {
1707 SlowPathCode* slow_path = deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathX86>(deoptimize);
1708 GenerateTestAndBranch<Label>(deoptimize,
1709 /* condition_input_index */ 0,
1710 slow_path->GetEntryLabel(),
1711 /* false_target */ nullptr);
1712 }
1713
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)1714 void LocationsBuilderX86::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
1715 LocationSummary* locations = new (GetGraph()->GetArena())
1716 LocationSummary(flag, LocationSummary::kNoCall);
1717 locations->SetOut(Location::RequiresRegister());
1718 }
1719
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)1720 void InstructionCodeGeneratorX86::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
1721 __ movl(flag->GetLocations()->Out().AsRegister<Register>(),
1722 Address(ESP, codegen_->GetStackOffsetOfShouldDeoptimizeFlag()));
1723 }
1724
SelectCanUseCMOV(HSelect * select)1725 static bool SelectCanUseCMOV(HSelect* select) {
1726 // There are no conditional move instructions for XMMs.
1727 if (Primitive::IsFloatingPointType(select->GetType())) {
1728 return false;
1729 }
1730
1731 // A FP condition doesn't generate the single CC that we need.
1732 // In 32 bit mode, a long condition doesn't generate a single CC either.
1733 HInstruction* condition = select->GetCondition();
1734 if (condition->IsCondition()) {
1735 Primitive::Type compare_type = condition->InputAt(0)->GetType();
1736 if (compare_type == Primitive::kPrimLong ||
1737 Primitive::IsFloatingPointType(compare_type)) {
1738 return false;
1739 }
1740 }
1741
1742 // We can generate a CMOV for this Select.
1743 return true;
1744 }
1745
VisitSelect(HSelect * select)1746 void LocationsBuilderX86::VisitSelect(HSelect* select) {
1747 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(select);
1748 if (Primitive::IsFloatingPointType(select->GetType())) {
1749 locations->SetInAt(0, Location::RequiresFpuRegister());
1750 locations->SetInAt(1, Location::Any());
1751 } else {
1752 locations->SetInAt(0, Location::RequiresRegister());
1753 if (SelectCanUseCMOV(select)) {
1754 if (select->InputAt(1)->IsConstant()) {
1755 // Cmov can't handle a constant value.
1756 locations->SetInAt(1, Location::RequiresRegister());
1757 } else {
1758 locations->SetInAt(1, Location::Any());
1759 }
1760 } else {
1761 locations->SetInAt(1, Location::Any());
1762 }
1763 }
1764 if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) {
1765 locations->SetInAt(2, Location::RequiresRegister());
1766 }
1767 locations->SetOut(Location::SameAsFirstInput());
1768 }
1769
VisitSelect(HSelect * select)1770 void InstructionCodeGeneratorX86::VisitSelect(HSelect* select) {
1771 LocationSummary* locations = select->GetLocations();
1772 DCHECK(locations->InAt(0).Equals(locations->Out()));
1773 if (SelectCanUseCMOV(select)) {
1774 // If both the condition and the source types are integer, we can generate
1775 // a CMOV to implement Select.
1776
1777 HInstruction* select_condition = select->GetCondition();
1778 Condition cond = kNotEqual;
1779
1780 // Figure out how to test the 'condition'.
1781 if (select_condition->IsCondition()) {
1782 HCondition* condition = select_condition->AsCondition();
1783 if (!condition->IsEmittedAtUseSite()) {
1784 // This was a previously materialized condition.
1785 // Can we use the existing condition code?
1786 if (AreEflagsSetFrom(condition, select)) {
1787 // Materialization was the previous instruction. Condition codes are right.
1788 cond = X86Condition(condition->GetCondition());
1789 } else {
1790 // No, we have to recreate the condition code.
1791 Register cond_reg = locations->InAt(2).AsRegister<Register>();
1792 __ testl(cond_reg, cond_reg);
1793 }
1794 } else {
1795 // We can't handle FP or long here.
1796 DCHECK_NE(condition->InputAt(0)->GetType(), Primitive::kPrimLong);
1797 DCHECK(!Primitive::IsFloatingPointType(condition->InputAt(0)->GetType()));
1798 LocationSummary* cond_locations = condition->GetLocations();
1799 codegen_->GenerateIntCompare(cond_locations->InAt(0), cond_locations->InAt(1));
1800 cond = X86Condition(condition->GetCondition());
1801 }
1802 } else {
1803 // Must be a Boolean condition, which needs to be compared to 0.
1804 Register cond_reg = locations->InAt(2).AsRegister<Register>();
1805 __ testl(cond_reg, cond_reg);
1806 }
1807
1808 // If the condition is true, overwrite the output, which already contains false.
1809 Location false_loc = locations->InAt(0);
1810 Location true_loc = locations->InAt(1);
1811 if (select->GetType() == Primitive::kPrimLong) {
1812 // 64 bit conditional move.
1813 Register false_high = false_loc.AsRegisterPairHigh<Register>();
1814 Register false_low = false_loc.AsRegisterPairLow<Register>();
1815 if (true_loc.IsRegisterPair()) {
1816 __ cmovl(cond, false_high, true_loc.AsRegisterPairHigh<Register>());
1817 __ cmovl(cond, false_low, true_loc.AsRegisterPairLow<Register>());
1818 } else {
1819 __ cmovl(cond, false_high, Address(ESP, true_loc.GetHighStackIndex(kX86WordSize)));
1820 __ cmovl(cond, false_low, Address(ESP, true_loc.GetStackIndex()));
1821 }
1822 } else {
1823 // 32 bit conditional move.
1824 Register false_reg = false_loc.AsRegister<Register>();
1825 if (true_loc.IsRegister()) {
1826 __ cmovl(cond, false_reg, true_loc.AsRegister<Register>());
1827 } else {
1828 __ cmovl(cond, false_reg, Address(ESP, true_loc.GetStackIndex()));
1829 }
1830 }
1831 } else {
1832 NearLabel false_target;
1833 GenerateTestAndBranch<NearLabel>(
1834 select, /* condition_input_index */ 2, /* true_target */ nullptr, &false_target);
1835 codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType());
1836 __ Bind(&false_target);
1837 }
1838 }
1839
VisitNativeDebugInfo(HNativeDebugInfo * info)1840 void LocationsBuilderX86::VisitNativeDebugInfo(HNativeDebugInfo* info) {
1841 new (GetGraph()->GetArena()) LocationSummary(info);
1842 }
1843
VisitNativeDebugInfo(HNativeDebugInfo *)1844 void InstructionCodeGeneratorX86::VisitNativeDebugInfo(HNativeDebugInfo*) {
1845 // MaybeRecordNativeDebugInfo is already called implicitly in CodeGenerator::Compile.
1846 }
1847
GenerateNop()1848 void CodeGeneratorX86::GenerateNop() {
1849 __ nop();
1850 }
1851
HandleCondition(HCondition * cond)1852 void LocationsBuilderX86::HandleCondition(HCondition* cond) {
1853 LocationSummary* locations =
1854 new (GetGraph()->GetArena()) LocationSummary(cond, LocationSummary::kNoCall);
1855 // Handle the long/FP comparisons made in instruction simplification.
1856 switch (cond->InputAt(0)->GetType()) {
1857 case Primitive::kPrimLong: {
1858 locations->SetInAt(0, Location::RequiresRegister());
1859 locations->SetInAt(1, Location::Any());
1860 if (!cond->IsEmittedAtUseSite()) {
1861 locations->SetOut(Location::RequiresRegister());
1862 }
1863 break;
1864 }
1865 case Primitive::kPrimFloat:
1866 case Primitive::kPrimDouble: {
1867 locations->SetInAt(0, Location::RequiresFpuRegister());
1868 if (cond->InputAt(1)->IsX86LoadFromConstantTable()) {
1869 DCHECK(cond->InputAt(1)->IsEmittedAtUseSite());
1870 } else if (cond->InputAt(1)->IsConstant()) {
1871 locations->SetInAt(1, Location::RequiresFpuRegister());
1872 } else {
1873 locations->SetInAt(1, Location::Any());
1874 }
1875 if (!cond->IsEmittedAtUseSite()) {
1876 locations->SetOut(Location::RequiresRegister());
1877 }
1878 break;
1879 }
1880 default:
1881 locations->SetInAt(0, Location::RequiresRegister());
1882 locations->SetInAt(1, Location::Any());
1883 if (!cond->IsEmittedAtUseSite()) {
1884 // We need a byte register.
1885 locations->SetOut(Location::RegisterLocation(ECX));
1886 }
1887 break;
1888 }
1889 }
1890
HandleCondition(HCondition * cond)1891 void InstructionCodeGeneratorX86::HandleCondition(HCondition* cond) {
1892 if (cond->IsEmittedAtUseSite()) {
1893 return;
1894 }
1895
1896 LocationSummary* locations = cond->GetLocations();
1897 Location lhs = locations->InAt(0);
1898 Location rhs = locations->InAt(1);
1899 Register reg = locations->Out().AsRegister<Register>();
1900 NearLabel true_label, false_label;
1901
1902 switch (cond->InputAt(0)->GetType()) {
1903 default: {
1904 // Integer case.
1905
1906 // Clear output register: setb only sets the low byte.
1907 __ xorl(reg, reg);
1908 codegen_->GenerateIntCompare(lhs, rhs);
1909 __ setb(X86Condition(cond->GetCondition()), reg);
1910 return;
1911 }
1912 case Primitive::kPrimLong:
1913 GenerateLongComparesAndJumps(cond, &true_label, &false_label);
1914 break;
1915 case Primitive::kPrimFloat:
1916 GenerateFPCompare(lhs, rhs, cond, false);
1917 GenerateFPJumps(cond, &true_label, &false_label);
1918 break;
1919 case Primitive::kPrimDouble:
1920 GenerateFPCompare(lhs, rhs, cond, true);
1921 GenerateFPJumps(cond, &true_label, &false_label);
1922 break;
1923 }
1924
1925 // Convert the jumps into the result.
1926 NearLabel done_label;
1927
1928 // False case: result = 0.
1929 __ Bind(&false_label);
1930 __ xorl(reg, reg);
1931 __ jmp(&done_label);
1932
1933 // True case: result = 1.
1934 __ Bind(&true_label);
1935 __ movl(reg, Immediate(1));
1936 __ Bind(&done_label);
1937 }
1938
VisitEqual(HEqual * comp)1939 void LocationsBuilderX86::VisitEqual(HEqual* comp) {
1940 HandleCondition(comp);
1941 }
1942
VisitEqual(HEqual * comp)1943 void InstructionCodeGeneratorX86::VisitEqual(HEqual* comp) {
1944 HandleCondition(comp);
1945 }
1946
VisitNotEqual(HNotEqual * comp)1947 void LocationsBuilderX86::VisitNotEqual(HNotEqual* comp) {
1948 HandleCondition(comp);
1949 }
1950
VisitNotEqual(HNotEqual * comp)1951 void InstructionCodeGeneratorX86::VisitNotEqual(HNotEqual* comp) {
1952 HandleCondition(comp);
1953 }
1954
VisitLessThan(HLessThan * comp)1955 void LocationsBuilderX86::VisitLessThan(HLessThan* comp) {
1956 HandleCondition(comp);
1957 }
1958
VisitLessThan(HLessThan * comp)1959 void InstructionCodeGeneratorX86::VisitLessThan(HLessThan* comp) {
1960 HandleCondition(comp);
1961 }
1962
VisitLessThanOrEqual(HLessThanOrEqual * comp)1963 void LocationsBuilderX86::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
1964 HandleCondition(comp);
1965 }
1966
VisitLessThanOrEqual(HLessThanOrEqual * comp)1967 void InstructionCodeGeneratorX86::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
1968 HandleCondition(comp);
1969 }
1970
VisitGreaterThan(HGreaterThan * comp)1971 void LocationsBuilderX86::VisitGreaterThan(HGreaterThan* comp) {
1972 HandleCondition(comp);
1973 }
1974
VisitGreaterThan(HGreaterThan * comp)1975 void InstructionCodeGeneratorX86::VisitGreaterThan(HGreaterThan* comp) {
1976 HandleCondition(comp);
1977 }
1978
VisitGreaterThanOrEqual(HGreaterThanOrEqual * comp)1979 void LocationsBuilderX86::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
1980 HandleCondition(comp);
1981 }
1982
VisitGreaterThanOrEqual(HGreaterThanOrEqual * comp)1983 void InstructionCodeGeneratorX86::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
1984 HandleCondition(comp);
1985 }
1986
VisitBelow(HBelow * comp)1987 void LocationsBuilderX86::VisitBelow(HBelow* comp) {
1988 HandleCondition(comp);
1989 }
1990
VisitBelow(HBelow * comp)1991 void InstructionCodeGeneratorX86::VisitBelow(HBelow* comp) {
1992 HandleCondition(comp);
1993 }
1994
VisitBelowOrEqual(HBelowOrEqual * comp)1995 void LocationsBuilderX86::VisitBelowOrEqual(HBelowOrEqual* comp) {
1996 HandleCondition(comp);
1997 }
1998
VisitBelowOrEqual(HBelowOrEqual * comp)1999 void InstructionCodeGeneratorX86::VisitBelowOrEqual(HBelowOrEqual* comp) {
2000 HandleCondition(comp);
2001 }
2002
VisitAbove(HAbove * comp)2003 void LocationsBuilderX86::VisitAbove(HAbove* comp) {
2004 HandleCondition(comp);
2005 }
2006
VisitAbove(HAbove * comp)2007 void InstructionCodeGeneratorX86::VisitAbove(HAbove* comp) {
2008 HandleCondition(comp);
2009 }
2010
VisitAboveOrEqual(HAboveOrEqual * comp)2011 void LocationsBuilderX86::VisitAboveOrEqual(HAboveOrEqual* comp) {
2012 HandleCondition(comp);
2013 }
2014
VisitAboveOrEqual(HAboveOrEqual * comp)2015 void InstructionCodeGeneratorX86::VisitAboveOrEqual(HAboveOrEqual* comp) {
2016 HandleCondition(comp);
2017 }
2018
VisitIntConstant(HIntConstant * constant)2019 void LocationsBuilderX86::VisitIntConstant(HIntConstant* constant) {
2020 LocationSummary* locations =
2021 new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
2022 locations->SetOut(Location::ConstantLocation(constant));
2023 }
2024
VisitIntConstant(HIntConstant * constant ATTRIBUTE_UNUSED)2025 void InstructionCodeGeneratorX86::VisitIntConstant(HIntConstant* constant ATTRIBUTE_UNUSED) {
2026 // Will be generated at use site.
2027 }
2028
VisitNullConstant(HNullConstant * constant)2029 void LocationsBuilderX86::VisitNullConstant(HNullConstant* constant) {
2030 LocationSummary* locations =
2031 new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
2032 locations->SetOut(Location::ConstantLocation(constant));
2033 }
2034
VisitNullConstant(HNullConstant * constant ATTRIBUTE_UNUSED)2035 void InstructionCodeGeneratorX86::VisitNullConstant(HNullConstant* constant ATTRIBUTE_UNUSED) {
2036 // Will be generated at use site.
2037 }
2038
VisitLongConstant(HLongConstant * constant)2039 void LocationsBuilderX86::VisitLongConstant(HLongConstant* constant) {
2040 LocationSummary* locations =
2041 new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
2042 locations->SetOut(Location::ConstantLocation(constant));
2043 }
2044
VisitLongConstant(HLongConstant * constant ATTRIBUTE_UNUSED)2045 void InstructionCodeGeneratorX86::VisitLongConstant(HLongConstant* constant ATTRIBUTE_UNUSED) {
2046 // Will be generated at use site.
2047 }
2048
VisitFloatConstant(HFloatConstant * constant)2049 void LocationsBuilderX86::VisitFloatConstant(HFloatConstant* constant) {
2050 LocationSummary* locations =
2051 new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
2052 locations->SetOut(Location::ConstantLocation(constant));
2053 }
2054
VisitFloatConstant(HFloatConstant * constant ATTRIBUTE_UNUSED)2055 void InstructionCodeGeneratorX86::VisitFloatConstant(HFloatConstant* constant ATTRIBUTE_UNUSED) {
2056 // Will be generated at use site.
2057 }
2058
VisitDoubleConstant(HDoubleConstant * constant)2059 void LocationsBuilderX86::VisitDoubleConstant(HDoubleConstant* constant) {
2060 LocationSummary* locations =
2061 new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
2062 locations->SetOut(Location::ConstantLocation(constant));
2063 }
2064
VisitDoubleConstant(HDoubleConstant * constant ATTRIBUTE_UNUSED)2065 void InstructionCodeGeneratorX86::VisitDoubleConstant(HDoubleConstant* constant ATTRIBUTE_UNUSED) {
2066 // Will be generated at use site.
2067 }
2068
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)2069 void LocationsBuilderX86::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
2070 memory_barrier->SetLocations(nullptr);
2071 }
2072
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)2073 void InstructionCodeGeneratorX86::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
2074 codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
2075 }
2076
VisitReturnVoid(HReturnVoid * ret)2077 void LocationsBuilderX86::VisitReturnVoid(HReturnVoid* ret) {
2078 ret->SetLocations(nullptr);
2079 }
2080
VisitReturnVoid(HReturnVoid * ret ATTRIBUTE_UNUSED)2081 void InstructionCodeGeneratorX86::VisitReturnVoid(HReturnVoid* ret ATTRIBUTE_UNUSED) {
2082 codegen_->GenerateFrameExit();
2083 }
2084
VisitReturn(HReturn * ret)2085 void LocationsBuilderX86::VisitReturn(HReturn* ret) {
2086 LocationSummary* locations =
2087 new (GetGraph()->GetArena()) LocationSummary(ret, LocationSummary::kNoCall);
2088 switch (ret->InputAt(0)->GetType()) {
2089 case Primitive::kPrimBoolean:
2090 case Primitive::kPrimByte:
2091 case Primitive::kPrimChar:
2092 case Primitive::kPrimShort:
2093 case Primitive::kPrimInt:
2094 case Primitive::kPrimNot:
2095 locations->SetInAt(0, Location::RegisterLocation(EAX));
2096 break;
2097
2098 case Primitive::kPrimLong:
2099 locations->SetInAt(
2100 0, Location::RegisterPairLocation(EAX, EDX));
2101 break;
2102
2103 case Primitive::kPrimFloat:
2104 case Primitive::kPrimDouble:
2105 locations->SetInAt(
2106 0, Location::FpuRegisterLocation(XMM0));
2107 break;
2108
2109 default:
2110 LOG(FATAL) << "Unknown return type " << ret->InputAt(0)->GetType();
2111 }
2112 }
2113
VisitReturn(HReturn * ret)2114 void InstructionCodeGeneratorX86::VisitReturn(HReturn* ret) {
2115 if (kIsDebugBuild) {
2116 switch (ret->InputAt(0)->GetType()) {
2117 case Primitive::kPrimBoolean:
2118 case Primitive::kPrimByte:
2119 case Primitive::kPrimChar:
2120 case Primitive::kPrimShort:
2121 case Primitive::kPrimInt:
2122 case Primitive::kPrimNot:
2123 DCHECK_EQ(ret->GetLocations()->InAt(0).AsRegister<Register>(), EAX);
2124 break;
2125
2126 case Primitive::kPrimLong:
2127 DCHECK_EQ(ret->GetLocations()->InAt(0).AsRegisterPairLow<Register>(), EAX);
2128 DCHECK_EQ(ret->GetLocations()->InAt(0).AsRegisterPairHigh<Register>(), EDX);
2129 break;
2130
2131 case Primitive::kPrimFloat:
2132 case Primitive::kPrimDouble:
2133 DCHECK_EQ(ret->GetLocations()->InAt(0).AsFpuRegister<XmmRegister>(), XMM0);
2134 break;
2135
2136 default:
2137 LOG(FATAL) << "Unknown return type " << ret->InputAt(0)->GetType();
2138 }
2139 }
2140 codegen_->GenerateFrameExit();
2141 }
2142
VisitInvokeUnresolved(HInvokeUnresolved * invoke)2143 void LocationsBuilderX86::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
2144 // The trampoline uses the same calling convention as dex calling conventions,
2145 // except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain
2146 // the method_idx.
2147 HandleInvoke(invoke);
2148 }
2149
VisitInvokeUnresolved(HInvokeUnresolved * invoke)2150 void InstructionCodeGeneratorX86::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
2151 codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke);
2152 }
2153
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)2154 void LocationsBuilderX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
2155 // Explicit clinit checks triggered by static invokes must have been pruned by
2156 // art::PrepareForRegisterAllocation.
2157 DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
2158
2159 IntrinsicLocationsBuilderX86 intrinsic(codegen_);
2160 if (intrinsic.TryDispatch(invoke)) {
2161 if (invoke->GetLocations()->CanCall() && invoke->HasPcRelativeDexCache()) {
2162 invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::Any());
2163 }
2164 return;
2165 }
2166
2167 HandleInvoke(invoke);
2168
2169 // For PC-relative dex cache the invoke has an extra input, the PC-relative address base.
2170 if (invoke->HasPcRelativeDexCache()) {
2171 invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::RequiresRegister());
2172 }
2173 }
2174
TryGenerateIntrinsicCode(HInvoke * invoke,CodeGeneratorX86 * codegen)2175 static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorX86* codegen) {
2176 if (invoke->GetLocations()->Intrinsified()) {
2177 IntrinsicCodeGeneratorX86 intrinsic(codegen);
2178 intrinsic.Dispatch(invoke);
2179 return true;
2180 }
2181 return false;
2182 }
2183
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)2184 void InstructionCodeGeneratorX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
2185 // Explicit clinit checks triggered by static invokes must have been pruned by
2186 // art::PrepareForRegisterAllocation.
2187 DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
2188
2189 if (TryGenerateIntrinsicCode(invoke, codegen_)) {
2190 return;
2191 }
2192
2193 LocationSummary* locations = invoke->GetLocations();
2194 codegen_->GenerateStaticOrDirectCall(
2195 invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation());
2196 codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
2197 }
2198
VisitInvokeVirtual(HInvokeVirtual * invoke)2199 void LocationsBuilderX86::VisitInvokeVirtual(HInvokeVirtual* invoke) {
2200 IntrinsicLocationsBuilderX86 intrinsic(codegen_);
2201 if (intrinsic.TryDispatch(invoke)) {
2202 return;
2203 }
2204
2205 HandleInvoke(invoke);
2206 }
2207
HandleInvoke(HInvoke * invoke)2208 void LocationsBuilderX86::HandleInvoke(HInvoke* invoke) {
2209 InvokeDexCallingConventionVisitorX86 calling_convention_visitor;
2210 CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
2211 }
2212
VisitInvokeVirtual(HInvokeVirtual * invoke)2213 void InstructionCodeGeneratorX86::VisitInvokeVirtual(HInvokeVirtual* invoke) {
2214 if (TryGenerateIntrinsicCode(invoke, codegen_)) {
2215 return;
2216 }
2217
2218 codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
2219 DCHECK(!codegen_->IsLeafMethod());
2220 codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
2221 }
2222
VisitInvokeInterface(HInvokeInterface * invoke)2223 void LocationsBuilderX86::VisitInvokeInterface(HInvokeInterface* invoke) {
2224 // This call to HandleInvoke allocates a temporary (core) register
2225 // which is also used to transfer the hidden argument from FP to
2226 // core register.
2227 HandleInvoke(invoke);
2228 // Add the hidden argument.
2229 invoke->GetLocations()->AddTemp(Location::FpuRegisterLocation(XMM7));
2230 }
2231
VisitInvokeInterface(HInvokeInterface * invoke)2232 void InstructionCodeGeneratorX86::VisitInvokeInterface(HInvokeInterface* invoke) {
2233 // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
2234 LocationSummary* locations = invoke->GetLocations();
2235 Register temp = locations->GetTemp(0).AsRegister<Register>();
2236 XmmRegister hidden_reg = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
2237 Location receiver = locations->InAt(0);
2238 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
2239
2240 // Set the hidden argument. This is safe to do this here, as XMM7
2241 // won't be modified thereafter, before the `call` instruction.
2242 DCHECK_EQ(XMM7, hidden_reg);
2243 __ movl(temp, Immediate(invoke->GetDexMethodIndex()));
2244 __ movd(hidden_reg, temp);
2245
2246 if (receiver.IsStackSlot()) {
2247 __ movl(temp, Address(ESP, receiver.GetStackIndex()));
2248 // /* HeapReference<Class> */ temp = temp->klass_
2249 __ movl(temp, Address(temp, class_offset));
2250 } else {
2251 // /* HeapReference<Class> */ temp = receiver->klass_
2252 __ movl(temp, Address(receiver.AsRegister<Register>(), class_offset));
2253 }
2254 codegen_->MaybeRecordImplicitNullCheck(invoke);
2255 // Instead of simply (possibly) unpoisoning `temp` here, we should
2256 // emit a read barrier for the previous class reference load.
2257 // However this is not required in practice, as this is an
2258 // intermediate/temporary reference and because the current
2259 // concurrent copying collector keeps the from-space memory
2260 // intact/accessible until the end of the marking phase (the
2261 // concurrent copying collector may not in the future).
2262 __ MaybeUnpoisonHeapReference(temp);
2263 // temp = temp->GetAddressOfIMT()
2264 __ movl(temp,
2265 Address(temp, mirror::Class::ImtPtrOffset(kX86PointerSize).Uint32Value()));
2266 // temp = temp->GetImtEntryAt(method_offset);
2267 uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
2268 invoke->GetImtIndex(), kX86PointerSize));
2269 __ movl(temp, Address(temp, method_offset));
2270 // call temp->GetEntryPoint();
2271 __ call(Address(temp,
2272 ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86PointerSize).Int32Value()));
2273
2274 DCHECK(!codegen_->IsLeafMethod());
2275 codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
2276 }
2277
VisitInvokePolymorphic(HInvokePolymorphic * invoke)2278 void LocationsBuilderX86::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
2279 HandleInvoke(invoke);
2280 }
2281
VisitInvokePolymorphic(HInvokePolymorphic * invoke)2282 void InstructionCodeGeneratorX86::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
2283 codegen_->GenerateInvokePolymorphicCall(invoke);
2284 }
2285
VisitNeg(HNeg * neg)2286 void LocationsBuilderX86::VisitNeg(HNeg* neg) {
2287 LocationSummary* locations =
2288 new (GetGraph()->GetArena()) LocationSummary(neg, LocationSummary::kNoCall);
2289 switch (neg->GetResultType()) {
2290 case Primitive::kPrimInt:
2291 case Primitive::kPrimLong:
2292 locations->SetInAt(0, Location::RequiresRegister());
2293 locations->SetOut(Location::SameAsFirstInput());
2294 break;
2295
2296 case Primitive::kPrimFloat:
2297 locations->SetInAt(0, Location::RequiresFpuRegister());
2298 locations->SetOut(Location::SameAsFirstInput());
2299 locations->AddTemp(Location::RequiresRegister());
2300 locations->AddTemp(Location::RequiresFpuRegister());
2301 break;
2302
2303 case Primitive::kPrimDouble:
2304 locations->SetInAt(0, Location::RequiresFpuRegister());
2305 locations->SetOut(Location::SameAsFirstInput());
2306 locations->AddTemp(Location::RequiresFpuRegister());
2307 break;
2308
2309 default:
2310 LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
2311 }
2312 }
2313
VisitNeg(HNeg * neg)2314 void InstructionCodeGeneratorX86::VisitNeg(HNeg* neg) {
2315 LocationSummary* locations = neg->GetLocations();
2316 Location out = locations->Out();
2317 Location in = locations->InAt(0);
2318 switch (neg->GetResultType()) {
2319 case Primitive::kPrimInt:
2320 DCHECK(in.IsRegister());
2321 DCHECK(in.Equals(out));
2322 __ negl(out.AsRegister<Register>());
2323 break;
2324
2325 case Primitive::kPrimLong:
2326 DCHECK(in.IsRegisterPair());
2327 DCHECK(in.Equals(out));
2328 __ negl(out.AsRegisterPairLow<Register>());
2329 // Negation is similar to subtraction from zero. The least
2330 // significant byte triggers a borrow when it is different from
2331 // zero; to take it into account, add 1 to the most significant
2332 // byte if the carry flag (CF) is set to 1 after the first NEGL
2333 // operation.
2334 __ adcl(out.AsRegisterPairHigh<Register>(), Immediate(0));
2335 __ negl(out.AsRegisterPairHigh<Register>());
2336 break;
2337
2338 case Primitive::kPrimFloat: {
2339 DCHECK(in.Equals(out));
2340 Register constant = locations->GetTemp(0).AsRegister<Register>();
2341 XmmRegister mask = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
2342 // Implement float negation with an exclusive or with value
2343 // 0x80000000 (mask for bit 31, representing the sign of a
2344 // single-precision floating-point number).
2345 __ movl(constant, Immediate(INT32_C(0x80000000)));
2346 __ movd(mask, constant);
2347 __ xorps(out.AsFpuRegister<XmmRegister>(), mask);
2348 break;
2349 }
2350
2351 case Primitive::kPrimDouble: {
2352 DCHECK(in.Equals(out));
2353 XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
2354 // Implement double negation with an exclusive or with value
2355 // 0x8000000000000000 (mask for bit 63, representing the sign of
2356 // a double-precision floating-point number).
2357 __ LoadLongConstant(mask, INT64_C(0x8000000000000000));
2358 __ xorpd(out.AsFpuRegister<XmmRegister>(), mask);
2359 break;
2360 }
2361
2362 default:
2363 LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
2364 }
2365 }
2366
VisitX86FPNeg(HX86FPNeg * neg)2367 void LocationsBuilderX86::VisitX86FPNeg(HX86FPNeg* neg) {
2368 LocationSummary* locations =
2369 new (GetGraph()->GetArena()) LocationSummary(neg, LocationSummary::kNoCall);
2370 DCHECK(Primitive::IsFloatingPointType(neg->GetType()));
2371 locations->SetInAt(0, Location::RequiresFpuRegister());
2372 locations->SetInAt(1, Location::RequiresRegister());
2373 locations->SetOut(Location::SameAsFirstInput());
2374 locations->AddTemp(Location::RequiresFpuRegister());
2375 }
2376
VisitX86FPNeg(HX86FPNeg * neg)2377 void InstructionCodeGeneratorX86::VisitX86FPNeg(HX86FPNeg* neg) {
2378 LocationSummary* locations = neg->GetLocations();
2379 Location out = locations->Out();
2380 DCHECK(locations->InAt(0).Equals(out));
2381
2382 Register constant_area = locations->InAt(1).AsRegister<Register>();
2383 XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
2384 if (neg->GetType() == Primitive::kPrimFloat) {
2385 __ movss(mask, codegen_->LiteralInt32Address(INT32_C(0x80000000),
2386 neg->GetBaseMethodAddress(),
2387 constant_area));
2388 __ xorps(out.AsFpuRegister<XmmRegister>(), mask);
2389 } else {
2390 __ movsd(mask, codegen_->LiteralInt64Address(INT64_C(0x8000000000000000),
2391 neg->GetBaseMethodAddress(),
2392 constant_area));
2393 __ xorpd(out.AsFpuRegister<XmmRegister>(), mask);
2394 }
2395 }
2396
VisitTypeConversion(HTypeConversion * conversion)2397 void LocationsBuilderX86::VisitTypeConversion(HTypeConversion* conversion) {
2398 Primitive::Type result_type = conversion->GetResultType();
2399 Primitive::Type input_type = conversion->GetInputType();
2400 DCHECK_NE(result_type, input_type);
2401
2402 // The float-to-long and double-to-long type conversions rely on a
2403 // call to the runtime.
2404 LocationSummary::CallKind call_kind =
2405 ((input_type == Primitive::kPrimFloat || input_type == Primitive::kPrimDouble)
2406 && result_type == Primitive::kPrimLong)
2407 ? LocationSummary::kCallOnMainOnly
2408 : LocationSummary::kNoCall;
2409 LocationSummary* locations =
2410 new (GetGraph()->GetArena()) LocationSummary(conversion, call_kind);
2411
2412 // The Java language does not allow treating boolean as an integral type but
2413 // our bit representation makes it safe.
2414
2415 switch (result_type) {
2416 case Primitive::kPrimByte:
2417 switch (input_type) {
2418 case Primitive::kPrimLong: {
2419 // Type conversion from long to byte is a result of code transformations.
2420 HInstruction* input = conversion->InputAt(0);
2421 Location input_location = input->IsConstant()
2422 ? Location::ConstantLocation(input->AsConstant())
2423 : Location::RegisterPairLocation(EAX, EDX);
2424 locations->SetInAt(0, input_location);
2425 // Make the output overlap to please the register allocator. This greatly simplifies
2426 // the validation of the linear scan implementation
2427 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
2428 break;
2429 }
2430 case Primitive::kPrimBoolean:
2431 // Boolean input is a result of code transformations.
2432 case Primitive::kPrimShort:
2433 case Primitive::kPrimInt:
2434 case Primitive::kPrimChar:
2435 // Processing a Dex `int-to-byte' instruction.
2436 locations->SetInAt(0, Location::ByteRegisterOrConstant(ECX, conversion->InputAt(0)));
2437 // Make the output overlap to please the register allocator. This greatly simplifies
2438 // the validation of the linear scan implementation
2439 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
2440 break;
2441
2442 default:
2443 LOG(FATAL) << "Unexpected type conversion from " << input_type
2444 << " to " << result_type;
2445 }
2446 break;
2447
2448 case Primitive::kPrimShort:
2449 switch (input_type) {
2450 case Primitive::kPrimLong:
2451 // Type conversion from long to short is a result of code transformations.
2452 case Primitive::kPrimBoolean:
2453 // Boolean input is a result of code transformations.
2454 case Primitive::kPrimByte:
2455 case Primitive::kPrimInt:
2456 case Primitive::kPrimChar:
2457 // Processing a Dex `int-to-short' instruction.
2458 locations->SetInAt(0, Location::Any());
2459 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2460 break;
2461
2462 default:
2463 LOG(FATAL) << "Unexpected type conversion from " << input_type
2464 << " to " << result_type;
2465 }
2466 break;
2467
2468 case Primitive::kPrimInt:
2469 switch (input_type) {
2470 case Primitive::kPrimLong:
2471 // Processing a Dex `long-to-int' instruction.
2472 locations->SetInAt(0, Location::Any());
2473 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2474 break;
2475
2476 case Primitive::kPrimFloat:
2477 // Processing a Dex `float-to-int' instruction.
2478 locations->SetInAt(0, Location::RequiresFpuRegister());
2479 locations->SetOut(Location::RequiresRegister());
2480 locations->AddTemp(Location::RequiresFpuRegister());
2481 break;
2482
2483 case Primitive::kPrimDouble:
2484 // Processing a Dex `double-to-int' instruction.
2485 locations->SetInAt(0, Location::RequiresFpuRegister());
2486 locations->SetOut(Location::RequiresRegister());
2487 locations->AddTemp(Location::RequiresFpuRegister());
2488 break;
2489
2490 default:
2491 LOG(FATAL) << "Unexpected type conversion from " << input_type
2492 << " to " << result_type;
2493 }
2494 break;
2495
2496 case Primitive::kPrimLong:
2497 switch (input_type) {
2498 case Primitive::kPrimBoolean:
2499 // Boolean input is a result of code transformations.
2500 case Primitive::kPrimByte:
2501 case Primitive::kPrimShort:
2502 case Primitive::kPrimInt:
2503 case Primitive::kPrimChar:
2504 // Processing a Dex `int-to-long' instruction.
2505 locations->SetInAt(0, Location::RegisterLocation(EAX));
2506 locations->SetOut(Location::RegisterPairLocation(EAX, EDX));
2507 break;
2508
2509 case Primitive::kPrimFloat:
2510 case Primitive::kPrimDouble: {
2511 // Processing a Dex `float-to-long' or 'double-to-long' instruction.
2512 InvokeRuntimeCallingConvention calling_convention;
2513 XmmRegister parameter = calling_convention.GetFpuRegisterAt(0);
2514 locations->SetInAt(0, Location::FpuRegisterLocation(parameter));
2515
2516 // The runtime helper puts the result in EAX, EDX.
2517 locations->SetOut(Location::RegisterPairLocation(EAX, EDX));
2518 }
2519 break;
2520
2521 default:
2522 LOG(FATAL) << "Unexpected type conversion from " << input_type
2523 << " to " << result_type;
2524 }
2525 break;
2526
2527 case Primitive::kPrimChar:
2528 switch (input_type) {
2529 case Primitive::kPrimLong:
2530 // Type conversion from long to char is a result of code transformations.
2531 case Primitive::kPrimBoolean:
2532 // Boolean input is a result of code transformations.
2533 case Primitive::kPrimByte:
2534 case Primitive::kPrimShort:
2535 case Primitive::kPrimInt:
2536 // Processing a Dex `int-to-char' instruction.
2537 locations->SetInAt(0, Location::Any());
2538 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2539 break;
2540
2541 default:
2542 LOG(FATAL) << "Unexpected type conversion from " << input_type
2543 << " to " << result_type;
2544 }
2545 break;
2546
2547 case Primitive::kPrimFloat:
2548 switch (input_type) {
2549 case Primitive::kPrimBoolean:
2550 // Boolean input is a result of code transformations.
2551 case Primitive::kPrimByte:
2552 case Primitive::kPrimShort:
2553 case Primitive::kPrimInt:
2554 case Primitive::kPrimChar:
2555 // Processing a Dex `int-to-float' instruction.
2556 locations->SetInAt(0, Location::RequiresRegister());
2557 locations->SetOut(Location::RequiresFpuRegister());
2558 break;
2559
2560 case Primitive::kPrimLong:
2561 // Processing a Dex `long-to-float' instruction.
2562 locations->SetInAt(0, Location::Any());
2563 locations->SetOut(Location::Any());
2564 break;
2565
2566 case Primitive::kPrimDouble:
2567 // Processing a Dex `double-to-float' instruction.
2568 locations->SetInAt(0, Location::RequiresFpuRegister());
2569 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
2570 break;
2571
2572 default:
2573 LOG(FATAL) << "Unexpected type conversion from " << input_type
2574 << " to " << result_type;
2575 };
2576 break;
2577
2578 case Primitive::kPrimDouble:
2579 switch (input_type) {
2580 case Primitive::kPrimBoolean:
2581 // Boolean input is a result of code transformations.
2582 case Primitive::kPrimByte:
2583 case Primitive::kPrimShort:
2584 case Primitive::kPrimInt:
2585 case Primitive::kPrimChar:
2586 // Processing a Dex `int-to-double' instruction.
2587 locations->SetInAt(0, Location::RequiresRegister());
2588 locations->SetOut(Location::RequiresFpuRegister());
2589 break;
2590
2591 case Primitive::kPrimLong:
2592 // Processing a Dex `long-to-double' instruction.
2593 locations->SetInAt(0, Location::Any());
2594 locations->SetOut(Location::Any());
2595 break;
2596
2597 case Primitive::kPrimFloat:
2598 // Processing a Dex `float-to-double' instruction.
2599 locations->SetInAt(0, Location::RequiresFpuRegister());
2600 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
2601 break;
2602
2603 default:
2604 LOG(FATAL) << "Unexpected type conversion from " << input_type
2605 << " to " << result_type;
2606 }
2607 break;
2608
2609 default:
2610 LOG(FATAL) << "Unexpected type conversion from " << input_type
2611 << " to " << result_type;
2612 }
2613 }
2614
VisitTypeConversion(HTypeConversion * conversion)2615 void InstructionCodeGeneratorX86::VisitTypeConversion(HTypeConversion* conversion) {
2616 LocationSummary* locations = conversion->GetLocations();
2617 Location out = locations->Out();
2618 Location in = locations->InAt(0);
2619 Primitive::Type result_type = conversion->GetResultType();
2620 Primitive::Type input_type = conversion->GetInputType();
2621 DCHECK_NE(result_type, input_type);
2622 switch (result_type) {
2623 case Primitive::kPrimByte:
2624 switch (input_type) {
2625 case Primitive::kPrimLong:
2626 // Type conversion from long to byte is a result of code transformations.
2627 if (in.IsRegisterPair()) {
2628 __ movsxb(out.AsRegister<Register>(), in.AsRegisterPairLow<ByteRegister>());
2629 } else {
2630 DCHECK(in.GetConstant()->IsLongConstant());
2631 int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
2632 __ movl(out.AsRegister<Register>(), Immediate(static_cast<int8_t>(value)));
2633 }
2634 break;
2635 case Primitive::kPrimBoolean:
2636 // Boolean input is a result of code transformations.
2637 case Primitive::kPrimShort:
2638 case Primitive::kPrimInt:
2639 case Primitive::kPrimChar:
2640 // Processing a Dex `int-to-byte' instruction.
2641 if (in.IsRegister()) {
2642 __ movsxb(out.AsRegister<Register>(), in.AsRegister<ByteRegister>());
2643 } else {
2644 DCHECK(in.GetConstant()->IsIntConstant());
2645 int32_t value = in.GetConstant()->AsIntConstant()->GetValue();
2646 __ movl(out.AsRegister<Register>(), Immediate(static_cast<int8_t>(value)));
2647 }
2648 break;
2649
2650 default:
2651 LOG(FATAL) << "Unexpected type conversion from " << input_type
2652 << " to " << result_type;
2653 }
2654 break;
2655
2656 case Primitive::kPrimShort:
2657 switch (input_type) {
2658 case Primitive::kPrimLong:
2659 // Type conversion from long to short is a result of code transformations.
2660 if (in.IsRegisterPair()) {
2661 __ movsxw(out.AsRegister<Register>(), in.AsRegisterPairLow<Register>());
2662 } else if (in.IsDoubleStackSlot()) {
2663 __ movsxw(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex()));
2664 } else {
2665 DCHECK(in.GetConstant()->IsLongConstant());
2666 int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
2667 __ movl(out.AsRegister<Register>(), Immediate(static_cast<int16_t>(value)));
2668 }
2669 break;
2670 case Primitive::kPrimBoolean:
2671 // Boolean input is a result of code transformations.
2672 case Primitive::kPrimByte:
2673 case Primitive::kPrimInt:
2674 case Primitive::kPrimChar:
2675 // Processing a Dex `int-to-short' instruction.
2676 if (in.IsRegister()) {
2677 __ movsxw(out.AsRegister<Register>(), in.AsRegister<Register>());
2678 } else if (in.IsStackSlot()) {
2679 __ movsxw(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex()));
2680 } else {
2681 DCHECK(in.GetConstant()->IsIntConstant());
2682 int32_t value = in.GetConstant()->AsIntConstant()->GetValue();
2683 __ movl(out.AsRegister<Register>(), Immediate(static_cast<int16_t>(value)));
2684 }
2685 break;
2686
2687 default:
2688 LOG(FATAL) << "Unexpected type conversion from " << input_type
2689 << " to " << result_type;
2690 }
2691 break;
2692
2693 case Primitive::kPrimInt:
2694 switch (input_type) {
2695 case Primitive::kPrimLong:
2696 // Processing a Dex `long-to-int' instruction.
2697 if (in.IsRegisterPair()) {
2698 __ movl(out.AsRegister<Register>(), in.AsRegisterPairLow<Register>());
2699 } else if (in.IsDoubleStackSlot()) {
2700 __ movl(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex()));
2701 } else {
2702 DCHECK(in.IsConstant());
2703 DCHECK(in.GetConstant()->IsLongConstant());
2704 int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
2705 __ movl(out.AsRegister<Register>(), Immediate(static_cast<int32_t>(value)));
2706 }
2707 break;
2708
2709 case Primitive::kPrimFloat: {
2710 // Processing a Dex `float-to-int' instruction.
2711 XmmRegister input = in.AsFpuRegister<XmmRegister>();
2712 Register output = out.AsRegister<Register>();
2713 XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
2714 NearLabel done, nan;
2715
2716 __ movl(output, Immediate(kPrimIntMax));
2717 // temp = int-to-float(output)
2718 __ cvtsi2ss(temp, output);
2719 // if input >= temp goto done
2720 __ comiss(input, temp);
2721 __ j(kAboveEqual, &done);
2722 // if input == NaN goto nan
2723 __ j(kUnordered, &nan);
2724 // output = float-to-int-truncate(input)
2725 __ cvttss2si(output, input);
2726 __ jmp(&done);
2727 __ Bind(&nan);
2728 // output = 0
2729 __ xorl(output, output);
2730 __ Bind(&done);
2731 break;
2732 }
2733
2734 case Primitive::kPrimDouble: {
2735 // Processing a Dex `double-to-int' instruction.
2736 XmmRegister input = in.AsFpuRegister<XmmRegister>();
2737 Register output = out.AsRegister<Register>();
2738 XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
2739 NearLabel done, nan;
2740
2741 __ movl(output, Immediate(kPrimIntMax));
2742 // temp = int-to-double(output)
2743 __ cvtsi2sd(temp, output);
2744 // if input >= temp goto done
2745 __ comisd(input, temp);
2746 __ j(kAboveEqual, &done);
2747 // if input == NaN goto nan
2748 __ j(kUnordered, &nan);
2749 // output = double-to-int-truncate(input)
2750 __ cvttsd2si(output, input);
2751 __ jmp(&done);
2752 __ Bind(&nan);
2753 // output = 0
2754 __ xorl(output, output);
2755 __ Bind(&done);
2756 break;
2757 }
2758
2759 default:
2760 LOG(FATAL) << "Unexpected type conversion from " << input_type
2761 << " to " << result_type;
2762 }
2763 break;
2764
2765 case Primitive::kPrimLong:
2766 switch (input_type) {
2767 case Primitive::kPrimBoolean:
2768 // Boolean input is a result of code transformations.
2769 case Primitive::kPrimByte:
2770 case Primitive::kPrimShort:
2771 case Primitive::kPrimInt:
2772 case Primitive::kPrimChar:
2773 // Processing a Dex `int-to-long' instruction.
2774 DCHECK_EQ(out.AsRegisterPairLow<Register>(), EAX);
2775 DCHECK_EQ(out.AsRegisterPairHigh<Register>(), EDX);
2776 DCHECK_EQ(in.AsRegister<Register>(), EAX);
2777 __ cdq();
2778 break;
2779
2780 case Primitive::kPrimFloat:
2781 // Processing a Dex `float-to-long' instruction.
2782 codegen_->InvokeRuntime(kQuickF2l, conversion, conversion->GetDexPc());
2783 CheckEntrypointTypes<kQuickF2l, int64_t, float>();
2784 break;
2785
2786 case Primitive::kPrimDouble:
2787 // Processing a Dex `double-to-long' instruction.
2788 codegen_->InvokeRuntime(kQuickD2l, conversion, conversion->GetDexPc());
2789 CheckEntrypointTypes<kQuickD2l, int64_t, double>();
2790 break;
2791
2792 default:
2793 LOG(FATAL) << "Unexpected type conversion from " << input_type
2794 << " to " << result_type;
2795 }
2796 break;
2797
2798 case Primitive::kPrimChar:
2799 switch (input_type) {
2800 case Primitive::kPrimLong:
2801 // Type conversion from long to short is a result of code transformations.
2802 if (in.IsRegisterPair()) {
2803 __ movzxw(out.AsRegister<Register>(), in.AsRegisterPairLow<Register>());
2804 } else if (in.IsDoubleStackSlot()) {
2805 __ movzxw(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex()));
2806 } else {
2807 DCHECK(in.GetConstant()->IsLongConstant());
2808 int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
2809 __ movl(out.AsRegister<Register>(), Immediate(static_cast<uint16_t>(value)));
2810 }
2811 break;
2812 case Primitive::kPrimBoolean:
2813 // Boolean input is a result of code transformations.
2814 case Primitive::kPrimByte:
2815 case Primitive::kPrimShort:
2816 case Primitive::kPrimInt:
2817 // Processing a Dex `Process a Dex `int-to-char'' instruction.
2818 if (in.IsRegister()) {
2819 __ movzxw(out.AsRegister<Register>(), in.AsRegister<Register>());
2820 } else if (in.IsStackSlot()) {
2821 __ movzxw(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex()));
2822 } else {
2823 DCHECK(in.GetConstant()->IsIntConstant());
2824 int32_t value = in.GetConstant()->AsIntConstant()->GetValue();
2825 __ movl(out.AsRegister<Register>(), Immediate(static_cast<uint16_t>(value)));
2826 }
2827 break;
2828
2829 default:
2830 LOG(FATAL) << "Unexpected type conversion from " << input_type
2831 << " to " << result_type;
2832 }
2833 break;
2834
2835 case Primitive::kPrimFloat:
2836 switch (input_type) {
2837 case Primitive::kPrimBoolean:
2838 // Boolean input is a result of code transformations.
2839 case Primitive::kPrimByte:
2840 case Primitive::kPrimShort:
2841 case Primitive::kPrimInt:
2842 case Primitive::kPrimChar:
2843 // Processing a Dex `int-to-float' instruction.
2844 __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<Register>());
2845 break;
2846
2847 case Primitive::kPrimLong: {
2848 // Processing a Dex `long-to-float' instruction.
2849 size_t adjustment = 0;
2850
2851 // Create stack space for the call to
2852 // InstructionCodeGeneratorX86::PushOntoFPStack and/or X86Assembler::fstps below.
2853 // TODO: enhance register allocator to ask for stack temporaries.
2854 if (!in.IsDoubleStackSlot() || !out.IsStackSlot()) {
2855 adjustment = Primitive::ComponentSize(Primitive::kPrimLong);
2856 __ subl(ESP, Immediate(adjustment));
2857 }
2858
2859 // Load the value to the FP stack, using temporaries if needed.
2860 PushOntoFPStack(in, 0, adjustment, false, true);
2861
2862 if (out.IsStackSlot()) {
2863 __ fstps(Address(ESP, out.GetStackIndex() + adjustment));
2864 } else {
2865 __ fstps(Address(ESP, 0));
2866 Location stack_temp = Location::StackSlot(0);
2867 codegen_->Move32(out, stack_temp);
2868 }
2869
2870 // Remove the temporary stack space we allocated.
2871 if (adjustment != 0) {
2872 __ addl(ESP, Immediate(adjustment));
2873 }
2874 break;
2875 }
2876
2877 case Primitive::kPrimDouble:
2878 // Processing a Dex `double-to-float' instruction.
2879 __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
2880 break;
2881
2882 default:
2883 LOG(FATAL) << "Unexpected type conversion from " << input_type
2884 << " to " << result_type;
2885 };
2886 break;
2887
2888 case Primitive::kPrimDouble:
2889 switch (input_type) {
2890 case Primitive::kPrimBoolean:
2891 // Boolean input is a result of code transformations.
2892 case Primitive::kPrimByte:
2893 case Primitive::kPrimShort:
2894 case Primitive::kPrimInt:
2895 case Primitive::kPrimChar:
2896 // Processing a Dex `int-to-double' instruction.
2897 __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<Register>());
2898 break;
2899
2900 case Primitive::kPrimLong: {
2901 // Processing a Dex `long-to-double' instruction.
2902 size_t adjustment = 0;
2903
2904 // Create stack space for the call to
2905 // InstructionCodeGeneratorX86::PushOntoFPStack and/or X86Assembler::fstpl below.
2906 // TODO: enhance register allocator to ask for stack temporaries.
2907 if (!in.IsDoubleStackSlot() || !out.IsDoubleStackSlot()) {
2908 adjustment = Primitive::ComponentSize(Primitive::kPrimLong);
2909 __ subl(ESP, Immediate(adjustment));
2910 }
2911
2912 // Load the value to the FP stack, using temporaries if needed.
2913 PushOntoFPStack(in, 0, adjustment, false, true);
2914
2915 if (out.IsDoubleStackSlot()) {
2916 __ fstpl(Address(ESP, out.GetStackIndex() + adjustment));
2917 } else {
2918 __ fstpl(Address(ESP, 0));
2919 Location stack_temp = Location::DoubleStackSlot(0);
2920 codegen_->Move64(out, stack_temp);
2921 }
2922
2923 // Remove the temporary stack space we allocated.
2924 if (adjustment != 0) {
2925 __ addl(ESP, Immediate(adjustment));
2926 }
2927 break;
2928 }
2929
2930 case Primitive::kPrimFloat:
2931 // Processing a Dex `float-to-double' instruction.
2932 __ cvtss2sd(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
2933 break;
2934
2935 default:
2936 LOG(FATAL) << "Unexpected type conversion from " << input_type
2937 << " to " << result_type;
2938 };
2939 break;
2940
2941 default:
2942 LOG(FATAL) << "Unexpected type conversion from " << input_type
2943 << " to " << result_type;
2944 }
2945 }
2946
VisitAdd(HAdd * add)2947 void LocationsBuilderX86::VisitAdd(HAdd* add) {
2948 LocationSummary* locations =
2949 new (GetGraph()->GetArena()) LocationSummary(add, LocationSummary::kNoCall);
2950 switch (add->GetResultType()) {
2951 case Primitive::kPrimInt: {
2952 locations->SetInAt(0, Location::RequiresRegister());
2953 locations->SetInAt(1, Location::RegisterOrConstant(add->InputAt(1)));
2954 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2955 break;
2956 }
2957
2958 case Primitive::kPrimLong: {
2959 locations->SetInAt(0, Location::RequiresRegister());
2960 locations->SetInAt(1, Location::Any());
2961 locations->SetOut(Location::SameAsFirstInput());
2962 break;
2963 }
2964
2965 case Primitive::kPrimFloat:
2966 case Primitive::kPrimDouble: {
2967 locations->SetInAt(0, Location::RequiresFpuRegister());
2968 if (add->InputAt(1)->IsX86LoadFromConstantTable()) {
2969 DCHECK(add->InputAt(1)->IsEmittedAtUseSite());
2970 } else if (add->InputAt(1)->IsConstant()) {
2971 locations->SetInAt(1, Location::RequiresFpuRegister());
2972 } else {
2973 locations->SetInAt(1, Location::Any());
2974 }
2975 locations->SetOut(Location::SameAsFirstInput());
2976 break;
2977 }
2978
2979 default:
2980 LOG(FATAL) << "Unexpected add type " << add->GetResultType();
2981 break;
2982 }
2983 }
2984
VisitAdd(HAdd * add)2985 void InstructionCodeGeneratorX86::VisitAdd(HAdd* add) {
2986 LocationSummary* locations = add->GetLocations();
2987 Location first = locations->InAt(0);
2988 Location second = locations->InAt(1);
2989 Location out = locations->Out();
2990
2991 switch (add->GetResultType()) {
2992 case Primitive::kPrimInt: {
2993 if (second.IsRegister()) {
2994 if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
2995 __ addl(out.AsRegister<Register>(), second.AsRegister<Register>());
2996 } else if (out.AsRegister<Register>() == second.AsRegister<Register>()) {
2997 __ addl(out.AsRegister<Register>(), first.AsRegister<Register>());
2998 } else {
2999 __ leal(out.AsRegister<Register>(), Address(
3000 first.AsRegister<Register>(), second.AsRegister<Register>(), TIMES_1, 0));
3001 }
3002 } else if (second.IsConstant()) {
3003 int32_t value = second.GetConstant()->AsIntConstant()->GetValue();
3004 if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3005 __ addl(out.AsRegister<Register>(), Immediate(value));
3006 } else {
3007 __ leal(out.AsRegister<Register>(), Address(first.AsRegister<Register>(), value));
3008 }
3009 } else {
3010 DCHECK(first.Equals(locations->Out()));
3011 __ addl(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
3012 }
3013 break;
3014 }
3015
3016 case Primitive::kPrimLong: {
3017 if (second.IsRegisterPair()) {
3018 __ addl(first.AsRegisterPairLow<Register>(), second.AsRegisterPairLow<Register>());
3019 __ adcl(first.AsRegisterPairHigh<Register>(), second.AsRegisterPairHigh<Register>());
3020 } else if (second.IsDoubleStackSlot()) {
3021 __ addl(first.AsRegisterPairLow<Register>(), Address(ESP, second.GetStackIndex()));
3022 __ adcl(first.AsRegisterPairHigh<Register>(),
3023 Address(ESP, second.GetHighStackIndex(kX86WordSize)));
3024 } else {
3025 DCHECK(second.IsConstant()) << second;
3026 int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
3027 __ addl(first.AsRegisterPairLow<Register>(), Immediate(Low32Bits(value)));
3028 __ adcl(first.AsRegisterPairHigh<Register>(), Immediate(High32Bits(value)));
3029 }
3030 break;
3031 }
3032
3033 case Primitive::kPrimFloat: {
3034 if (second.IsFpuRegister()) {
3035 __ addss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3036 } else if (add->InputAt(1)->IsX86LoadFromConstantTable()) {
3037 HX86LoadFromConstantTable* const_area = add->InputAt(1)->AsX86LoadFromConstantTable();
3038 DCHECK(const_area->IsEmittedAtUseSite());
3039 __ addss(first.AsFpuRegister<XmmRegister>(),
3040 codegen_->LiteralFloatAddress(
3041 const_area->GetConstant()->AsFloatConstant()->GetValue(),
3042 const_area->GetBaseMethodAddress(),
3043 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3044 } else {
3045 DCHECK(second.IsStackSlot());
3046 __ addss(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3047 }
3048 break;
3049 }
3050
3051 case Primitive::kPrimDouble: {
3052 if (second.IsFpuRegister()) {
3053 __ addsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3054 } else if (add->InputAt(1)->IsX86LoadFromConstantTable()) {
3055 HX86LoadFromConstantTable* const_area = add->InputAt(1)->AsX86LoadFromConstantTable();
3056 DCHECK(const_area->IsEmittedAtUseSite());
3057 __ addsd(first.AsFpuRegister<XmmRegister>(),
3058 codegen_->LiteralDoubleAddress(
3059 const_area->GetConstant()->AsDoubleConstant()->GetValue(),
3060 const_area->GetBaseMethodAddress(),
3061 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3062 } else {
3063 DCHECK(second.IsDoubleStackSlot());
3064 __ addsd(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3065 }
3066 break;
3067 }
3068
3069 default:
3070 LOG(FATAL) << "Unexpected add type " << add->GetResultType();
3071 }
3072 }
3073
VisitSub(HSub * sub)3074 void LocationsBuilderX86::VisitSub(HSub* sub) {
3075 LocationSummary* locations =
3076 new (GetGraph()->GetArena()) LocationSummary(sub, LocationSummary::kNoCall);
3077 switch (sub->GetResultType()) {
3078 case Primitive::kPrimInt:
3079 case Primitive::kPrimLong: {
3080 locations->SetInAt(0, Location::RequiresRegister());
3081 locations->SetInAt(1, Location::Any());
3082 locations->SetOut(Location::SameAsFirstInput());
3083 break;
3084 }
3085 case Primitive::kPrimFloat:
3086 case Primitive::kPrimDouble: {
3087 locations->SetInAt(0, Location::RequiresFpuRegister());
3088 if (sub->InputAt(1)->IsX86LoadFromConstantTable()) {
3089 DCHECK(sub->InputAt(1)->IsEmittedAtUseSite());
3090 } else if (sub->InputAt(1)->IsConstant()) {
3091 locations->SetInAt(1, Location::RequiresFpuRegister());
3092 } else {
3093 locations->SetInAt(1, Location::Any());
3094 }
3095 locations->SetOut(Location::SameAsFirstInput());
3096 break;
3097 }
3098
3099 default:
3100 LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
3101 }
3102 }
3103
VisitSub(HSub * sub)3104 void InstructionCodeGeneratorX86::VisitSub(HSub* sub) {
3105 LocationSummary* locations = sub->GetLocations();
3106 Location first = locations->InAt(0);
3107 Location second = locations->InAt(1);
3108 DCHECK(first.Equals(locations->Out()));
3109 switch (sub->GetResultType()) {
3110 case Primitive::kPrimInt: {
3111 if (second.IsRegister()) {
3112 __ subl(first.AsRegister<Register>(), second.AsRegister<Register>());
3113 } else if (second.IsConstant()) {
3114 __ subl(first.AsRegister<Register>(),
3115 Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
3116 } else {
3117 __ subl(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
3118 }
3119 break;
3120 }
3121
3122 case Primitive::kPrimLong: {
3123 if (second.IsRegisterPair()) {
3124 __ subl(first.AsRegisterPairLow<Register>(), second.AsRegisterPairLow<Register>());
3125 __ sbbl(first.AsRegisterPairHigh<Register>(), second.AsRegisterPairHigh<Register>());
3126 } else if (second.IsDoubleStackSlot()) {
3127 __ subl(first.AsRegisterPairLow<Register>(), Address(ESP, second.GetStackIndex()));
3128 __ sbbl(first.AsRegisterPairHigh<Register>(),
3129 Address(ESP, second.GetHighStackIndex(kX86WordSize)));
3130 } else {
3131 DCHECK(second.IsConstant()) << second;
3132 int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
3133 __ subl(first.AsRegisterPairLow<Register>(), Immediate(Low32Bits(value)));
3134 __ sbbl(first.AsRegisterPairHigh<Register>(), Immediate(High32Bits(value)));
3135 }
3136 break;
3137 }
3138
3139 case Primitive::kPrimFloat: {
3140 if (second.IsFpuRegister()) {
3141 __ subss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3142 } else if (sub->InputAt(1)->IsX86LoadFromConstantTable()) {
3143 HX86LoadFromConstantTable* const_area = sub->InputAt(1)->AsX86LoadFromConstantTable();
3144 DCHECK(const_area->IsEmittedAtUseSite());
3145 __ subss(first.AsFpuRegister<XmmRegister>(),
3146 codegen_->LiteralFloatAddress(
3147 const_area->GetConstant()->AsFloatConstant()->GetValue(),
3148 const_area->GetBaseMethodAddress(),
3149 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3150 } else {
3151 DCHECK(second.IsStackSlot());
3152 __ subss(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3153 }
3154 break;
3155 }
3156
3157 case Primitive::kPrimDouble: {
3158 if (second.IsFpuRegister()) {
3159 __ subsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3160 } else if (sub->InputAt(1)->IsX86LoadFromConstantTable()) {
3161 HX86LoadFromConstantTable* const_area = sub->InputAt(1)->AsX86LoadFromConstantTable();
3162 DCHECK(const_area->IsEmittedAtUseSite());
3163 __ subsd(first.AsFpuRegister<XmmRegister>(),
3164 codegen_->LiteralDoubleAddress(
3165 const_area->GetConstant()->AsDoubleConstant()->GetValue(),
3166 const_area->GetBaseMethodAddress(),
3167 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3168 } else {
3169 DCHECK(second.IsDoubleStackSlot());
3170 __ subsd(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3171 }
3172 break;
3173 }
3174
3175 default:
3176 LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
3177 }
3178 }
3179
VisitMul(HMul * mul)3180 void LocationsBuilderX86::VisitMul(HMul* mul) {
3181 LocationSummary* locations =
3182 new (GetGraph()->GetArena()) LocationSummary(mul, LocationSummary::kNoCall);
3183 switch (mul->GetResultType()) {
3184 case Primitive::kPrimInt:
3185 locations->SetInAt(0, Location::RequiresRegister());
3186 locations->SetInAt(1, Location::Any());
3187 if (mul->InputAt(1)->IsIntConstant()) {
3188 // Can use 3 operand multiply.
3189 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3190 } else {
3191 locations->SetOut(Location::SameAsFirstInput());
3192 }
3193 break;
3194 case Primitive::kPrimLong: {
3195 locations->SetInAt(0, Location::RequiresRegister());
3196 locations->SetInAt(1, Location::Any());
3197 locations->SetOut(Location::SameAsFirstInput());
3198 // Needed for imul on 32bits with 64bits output.
3199 locations->AddTemp(Location::RegisterLocation(EAX));
3200 locations->AddTemp(Location::RegisterLocation(EDX));
3201 break;
3202 }
3203 case Primitive::kPrimFloat:
3204 case Primitive::kPrimDouble: {
3205 locations->SetInAt(0, Location::RequiresFpuRegister());
3206 if (mul->InputAt(1)->IsX86LoadFromConstantTable()) {
3207 DCHECK(mul->InputAt(1)->IsEmittedAtUseSite());
3208 } else if (mul->InputAt(1)->IsConstant()) {
3209 locations->SetInAt(1, Location::RequiresFpuRegister());
3210 } else {
3211 locations->SetInAt(1, Location::Any());
3212 }
3213 locations->SetOut(Location::SameAsFirstInput());
3214 break;
3215 }
3216
3217 default:
3218 LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
3219 }
3220 }
3221
VisitMul(HMul * mul)3222 void InstructionCodeGeneratorX86::VisitMul(HMul* mul) {
3223 LocationSummary* locations = mul->GetLocations();
3224 Location first = locations->InAt(0);
3225 Location second = locations->InAt(1);
3226 Location out = locations->Out();
3227
3228 switch (mul->GetResultType()) {
3229 case Primitive::kPrimInt:
3230 // The constant may have ended up in a register, so test explicitly to avoid
3231 // problems where the output may not be the same as the first operand.
3232 if (mul->InputAt(1)->IsIntConstant()) {
3233 Immediate imm(mul->InputAt(1)->AsIntConstant()->GetValue());
3234 __ imull(out.AsRegister<Register>(), first.AsRegister<Register>(), imm);
3235 } else if (second.IsRegister()) {
3236 DCHECK(first.Equals(out));
3237 __ imull(first.AsRegister<Register>(), second.AsRegister<Register>());
3238 } else {
3239 DCHECK(second.IsStackSlot());
3240 DCHECK(first.Equals(out));
3241 __ imull(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
3242 }
3243 break;
3244
3245 case Primitive::kPrimLong: {
3246 Register in1_hi = first.AsRegisterPairHigh<Register>();
3247 Register in1_lo = first.AsRegisterPairLow<Register>();
3248 Register eax = locations->GetTemp(0).AsRegister<Register>();
3249 Register edx = locations->GetTemp(1).AsRegister<Register>();
3250
3251 DCHECK_EQ(EAX, eax);
3252 DCHECK_EQ(EDX, edx);
3253
3254 // input: in1 - 64 bits, in2 - 64 bits.
3255 // output: in1
3256 // formula: in1.hi : in1.lo = (in1.lo * in2.hi + in1.hi * in2.lo)* 2^32 + in1.lo * in2.lo
3257 // parts: in1.hi = in1.lo * in2.hi + in1.hi * in2.lo + (in1.lo * in2.lo)[63:32]
3258 // parts: in1.lo = (in1.lo * in2.lo)[31:0]
3259 if (second.IsConstant()) {
3260 DCHECK(second.GetConstant()->IsLongConstant());
3261
3262 int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
3263 int32_t low_value = Low32Bits(value);
3264 int32_t high_value = High32Bits(value);
3265 Immediate low(low_value);
3266 Immediate high(high_value);
3267
3268 __ movl(eax, high);
3269 // eax <- in1.lo * in2.hi
3270 __ imull(eax, in1_lo);
3271 // in1.hi <- in1.hi * in2.lo
3272 __ imull(in1_hi, low);
3273 // in1.hi <- in1.lo * in2.hi + in1.hi * in2.lo
3274 __ addl(in1_hi, eax);
3275 // move in2_lo to eax to prepare for double precision
3276 __ movl(eax, low);
3277 // edx:eax <- in1.lo * in2.lo
3278 __ mull(in1_lo);
3279 // in1.hi <- in2.hi * in1.lo + in2.lo * in1.hi + (in1.lo * in2.lo)[63:32]
3280 __ addl(in1_hi, edx);
3281 // in1.lo <- (in1.lo * in2.lo)[31:0];
3282 __ movl(in1_lo, eax);
3283 } else if (second.IsRegisterPair()) {
3284 Register in2_hi = second.AsRegisterPairHigh<Register>();
3285 Register in2_lo = second.AsRegisterPairLow<Register>();
3286
3287 __ movl(eax, in2_hi);
3288 // eax <- in1.lo * in2.hi
3289 __ imull(eax, in1_lo);
3290 // in1.hi <- in1.hi * in2.lo
3291 __ imull(in1_hi, in2_lo);
3292 // in1.hi <- in1.lo * in2.hi + in1.hi * in2.lo
3293 __ addl(in1_hi, eax);
3294 // move in1_lo to eax to prepare for double precision
3295 __ movl(eax, in1_lo);
3296 // edx:eax <- in1.lo * in2.lo
3297 __ mull(in2_lo);
3298 // in1.hi <- in2.hi * in1.lo + in2.lo * in1.hi + (in1.lo * in2.lo)[63:32]
3299 __ addl(in1_hi, edx);
3300 // in1.lo <- (in1.lo * in2.lo)[31:0];
3301 __ movl(in1_lo, eax);
3302 } else {
3303 DCHECK(second.IsDoubleStackSlot()) << second;
3304 Address in2_hi(ESP, second.GetHighStackIndex(kX86WordSize));
3305 Address in2_lo(ESP, second.GetStackIndex());
3306
3307 __ movl(eax, in2_hi);
3308 // eax <- in1.lo * in2.hi
3309 __ imull(eax, in1_lo);
3310 // in1.hi <- in1.hi * in2.lo
3311 __ imull(in1_hi, in2_lo);
3312 // in1.hi <- in1.lo * in2.hi + in1.hi * in2.lo
3313 __ addl(in1_hi, eax);
3314 // move in1_lo to eax to prepare for double precision
3315 __ movl(eax, in1_lo);
3316 // edx:eax <- in1.lo * in2.lo
3317 __ mull(in2_lo);
3318 // in1.hi <- in2.hi * in1.lo + in2.lo * in1.hi + (in1.lo * in2.lo)[63:32]
3319 __ addl(in1_hi, edx);
3320 // in1.lo <- (in1.lo * in2.lo)[31:0];
3321 __ movl(in1_lo, eax);
3322 }
3323
3324 break;
3325 }
3326
3327 case Primitive::kPrimFloat: {
3328 DCHECK(first.Equals(locations->Out()));
3329 if (second.IsFpuRegister()) {
3330 __ mulss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3331 } else if (mul->InputAt(1)->IsX86LoadFromConstantTable()) {
3332 HX86LoadFromConstantTable* const_area = mul->InputAt(1)->AsX86LoadFromConstantTable();
3333 DCHECK(const_area->IsEmittedAtUseSite());
3334 __ mulss(first.AsFpuRegister<XmmRegister>(),
3335 codegen_->LiteralFloatAddress(
3336 const_area->GetConstant()->AsFloatConstant()->GetValue(),
3337 const_area->GetBaseMethodAddress(),
3338 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3339 } else {
3340 DCHECK(second.IsStackSlot());
3341 __ mulss(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3342 }
3343 break;
3344 }
3345
3346 case Primitive::kPrimDouble: {
3347 DCHECK(first.Equals(locations->Out()));
3348 if (second.IsFpuRegister()) {
3349 __ mulsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3350 } else if (mul->InputAt(1)->IsX86LoadFromConstantTable()) {
3351 HX86LoadFromConstantTable* const_area = mul->InputAt(1)->AsX86LoadFromConstantTable();
3352 DCHECK(const_area->IsEmittedAtUseSite());
3353 __ mulsd(first.AsFpuRegister<XmmRegister>(),
3354 codegen_->LiteralDoubleAddress(
3355 const_area->GetConstant()->AsDoubleConstant()->GetValue(),
3356 const_area->GetBaseMethodAddress(),
3357 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3358 } else {
3359 DCHECK(second.IsDoubleStackSlot());
3360 __ mulsd(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3361 }
3362 break;
3363 }
3364
3365 default:
3366 LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
3367 }
3368 }
3369
PushOntoFPStack(Location source,uint32_t temp_offset,uint32_t stack_adjustment,bool is_fp,bool is_wide)3370 void InstructionCodeGeneratorX86::PushOntoFPStack(Location source,
3371 uint32_t temp_offset,
3372 uint32_t stack_adjustment,
3373 bool is_fp,
3374 bool is_wide) {
3375 if (source.IsStackSlot()) {
3376 DCHECK(!is_wide);
3377 if (is_fp) {
3378 __ flds(Address(ESP, source.GetStackIndex() + stack_adjustment));
3379 } else {
3380 __ filds(Address(ESP, source.GetStackIndex() + stack_adjustment));
3381 }
3382 } else if (source.IsDoubleStackSlot()) {
3383 DCHECK(is_wide);
3384 if (is_fp) {
3385 __ fldl(Address(ESP, source.GetStackIndex() + stack_adjustment));
3386 } else {
3387 __ fildl(Address(ESP, source.GetStackIndex() + stack_adjustment));
3388 }
3389 } else {
3390 // Write the value to the temporary location on the stack and load to FP stack.
3391 if (!is_wide) {
3392 Location stack_temp = Location::StackSlot(temp_offset);
3393 codegen_->Move32(stack_temp, source);
3394 if (is_fp) {
3395 __ flds(Address(ESP, temp_offset));
3396 } else {
3397 __ filds(Address(ESP, temp_offset));
3398 }
3399 } else {
3400 Location stack_temp = Location::DoubleStackSlot(temp_offset);
3401 codegen_->Move64(stack_temp, source);
3402 if (is_fp) {
3403 __ fldl(Address(ESP, temp_offset));
3404 } else {
3405 __ fildl(Address(ESP, temp_offset));
3406 }
3407 }
3408 }
3409 }
3410
GenerateRemFP(HRem * rem)3411 void InstructionCodeGeneratorX86::GenerateRemFP(HRem *rem) {
3412 Primitive::Type type = rem->GetResultType();
3413 bool is_float = type == Primitive::kPrimFloat;
3414 size_t elem_size = Primitive::ComponentSize(type);
3415 LocationSummary* locations = rem->GetLocations();
3416 Location first = locations->InAt(0);
3417 Location second = locations->InAt(1);
3418 Location out = locations->Out();
3419
3420 // Create stack space for 2 elements.
3421 // TODO: enhance register allocator to ask for stack temporaries.
3422 __ subl(ESP, Immediate(2 * elem_size));
3423
3424 // Load the values to the FP stack in reverse order, using temporaries if needed.
3425 const bool is_wide = !is_float;
3426 PushOntoFPStack(second, elem_size, 2 * elem_size, /* is_fp */ true, is_wide);
3427 PushOntoFPStack(first, 0, 2 * elem_size, /* is_fp */ true, is_wide);
3428
3429 // Loop doing FPREM until we stabilize.
3430 NearLabel retry;
3431 __ Bind(&retry);
3432 __ fprem();
3433
3434 // Move FP status to AX.
3435 __ fstsw();
3436
3437 // And see if the argument reduction is complete. This is signaled by the
3438 // C2 FPU flag bit set to 0.
3439 __ andl(EAX, Immediate(kC2ConditionMask));
3440 __ j(kNotEqual, &retry);
3441
3442 // We have settled on the final value. Retrieve it into an XMM register.
3443 // Store FP top of stack to real stack.
3444 if (is_float) {
3445 __ fsts(Address(ESP, 0));
3446 } else {
3447 __ fstl(Address(ESP, 0));
3448 }
3449
3450 // Pop the 2 items from the FP stack.
3451 __ fucompp();
3452
3453 // Load the value from the stack into an XMM register.
3454 DCHECK(out.IsFpuRegister()) << out;
3455 if (is_float) {
3456 __ movss(out.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
3457 } else {
3458 __ movsd(out.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
3459 }
3460
3461 // And remove the temporary stack space we allocated.
3462 __ addl(ESP, Immediate(2 * elem_size));
3463 }
3464
3465
DivRemOneOrMinusOne(HBinaryOperation * instruction)3466 void InstructionCodeGeneratorX86::DivRemOneOrMinusOne(HBinaryOperation* instruction) {
3467 DCHECK(instruction->IsDiv() || instruction->IsRem());
3468
3469 LocationSummary* locations = instruction->GetLocations();
3470 DCHECK(locations->InAt(1).IsConstant());
3471 DCHECK(locations->InAt(1).GetConstant()->IsIntConstant());
3472
3473 Register out_register = locations->Out().AsRegister<Register>();
3474 Register input_register = locations->InAt(0).AsRegister<Register>();
3475 int32_t imm = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
3476
3477 DCHECK(imm == 1 || imm == -1);
3478
3479 if (instruction->IsRem()) {
3480 __ xorl(out_register, out_register);
3481 } else {
3482 __ movl(out_register, input_register);
3483 if (imm == -1) {
3484 __ negl(out_register);
3485 }
3486 }
3487 }
3488
3489
DivByPowerOfTwo(HDiv * instruction)3490 void InstructionCodeGeneratorX86::DivByPowerOfTwo(HDiv* instruction) {
3491 LocationSummary* locations = instruction->GetLocations();
3492
3493 Register out_register = locations->Out().AsRegister<Register>();
3494 Register input_register = locations->InAt(0).AsRegister<Register>();
3495 int32_t imm = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
3496 DCHECK(IsPowerOfTwo(AbsOrMin(imm)));
3497 uint32_t abs_imm = static_cast<uint32_t>(AbsOrMin(imm));
3498
3499 Register num = locations->GetTemp(0).AsRegister<Register>();
3500
3501 __ leal(num, Address(input_register, abs_imm - 1));
3502 __ testl(input_register, input_register);
3503 __ cmovl(kGreaterEqual, num, input_register);
3504 int shift = CTZ(imm);
3505 __ sarl(num, Immediate(shift));
3506
3507 if (imm < 0) {
3508 __ negl(num);
3509 }
3510
3511 __ movl(out_register, num);
3512 }
3513
GenerateDivRemWithAnyConstant(HBinaryOperation * instruction)3514 void InstructionCodeGeneratorX86::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) {
3515 DCHECK(instruction->IsDiv() || instruction->IsRem());
3516
3517 LocationSummary* locations = instruction->GetLocations();
3518 int imm = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
3519
3520 Register eax = locations->InAt(0).AsRegister<Register>();
3521 Register out = locations->Out().AsRegister<Register>();
3522 Register num;
3523 Register edx;
3524
3525 if (instruction->IsDiv()) {
3526 edx = locations->GetTemp(0).AsRegister<Register>();
3527 num = locations->GetTemp(1).AsRegister<Register>();
3528 } else {
3529 edx = locations->Out().AsRegister<Register>();
3530 num = locations->GetTemp(0).AsRegister<Register>();
3531 }
3532
3533 DCHECK_EQ(EAX, eax);
3534 DCHECK_EQ(EDX, edx);
3535 if (instruction->IsDiv()) {
3536 DCHECK_EQ(EAX, out);
3537 } else {
3538 DCHECK_EQ(EDX, out);
3539 }
3540
3541 int64_t magic;
3542 int shift;
3543 CalculateMagicAndShiftForDivRem(imm, false /* is_long */, &magic, &shift);
3544
3545 // Save the numerator.
3546 __ movl(num, eax);
3547
3548 // EAX = magic
3549 __ movl(eax, Immediate(magic));
3550
3551 // EDX:EAX = magic * numerator
3552 __ imull(num);
3553
3554 if (imm > 0 && magic < 0) {
3555 // EDX += num
3556 __ addl(edx, num);
3557 } else if (imm < 0 && magic > 0) {
3558 __ subl(edx, num);
3559 }
3560
3561 // Shift if needed.
3562 if (shift != 0) {
3563 __ sarl(edx, Immediate(shift));
3564 }
3565
3566 // EDX += 1 if EDX < 0
3567 __ movl(eax, edx);
3568 __ shrl(edx, Immediate(31));
3569 __ addl(edx, eax);
3570
3571 if (instruction->IsRem()) {
3572 __ movl(eax, num);
3573 __ imull(edx, Immediate(imm));
3574 __ subl(eax, edx);
3575 __ movl(edx, eax);
3576 } else {
3577 __ movl(eax, edx);
3578 }
3579 }
3580
GenerateDivRemIntegral(HBinaryOperation * instruction)3581 void InstructionCodeGeneratorX86::GenerateDivRemIntegral(HBinaryOperation* instruction) {
3582 DCHECK(instruction->IsDiv() || instruction->IsRem());
3583
3584 LocationSummary* locations = instruction->GetLocations();
3585 Location out = locations->Out();
3586 Location first = locations->InAt(0);
3587 Location second = locations->InAt(1);
3588 bool is_div = instruction->IsDiv();
3589
3590 switch (instruction->GetResultType()) {
3591 case Primitive::kPrimInt: {
3592 DCHECK_EQ(EAX, first.AsRegister<Register>());
3593 DCHECK_EQ(is_div ? EAX : EDX, out.AsRegister<Register>());
3594
3595 if (second.IsConstant()) {
3596 int32_t imm = second.GetConstant()->AsIntConstant()->GetValue();
3597
3598 if (imm == 0) {
3599 // Do not generate anything for 0. DivZeroCheck would forbid any generated code.
3600 } else if (imm == 1 || imm == -1) {
3601 DivRemOneOrMinusOne(instruction);
3602 } else if (is_div && IsPowerOfTwo(AbsOrMin(imm))) {
3603 DivByPowerOfTwo(instruction->AsDiv());
3604 } else {
3605 DCHECK(imm <= -2 || imm >= 2);
3606 GenerateDivRemWithAnyConstant(instruction);
3607 }
3608 } else {
3609 SlowPathCode* slow_path = new (GetGraph()->GetArena()) DivRemMinusOneSlowPathX86(
3610 instruction, out.AsRegister<Register>(), is_div);
3611 codegen_->AddSlowPath(slow_path);
3612
3613 Register second_reg = second.AsRegister<Register>();
3614 // 0x80000000/-1 triggers an arithmetic exception!
3615 // Dividing by -1 is actually negation and -0x800000000 = 0x80000000 so
3616 // it's safe to just use negl instead of more complex comparisons.
3617
3618 __ cmpl(second_reg, Immediate(-1));
3619 __ j(kEqual, slow_path->GetEntryLabel());
3620
3621 // edx:eax <- sign-extended of eax
3622 __ cdq();
3623 // eax = quotient, edx = remainder
3624 __ idivl(second_reg);
3625 __ Bind(slow_path->GetExitLabel());
3626 }
3627 break;
3628 }
3629
3630 case Primitive::kPrimLong: {
3631 InvokeRuntimeCallingConvention calling_convention;
3632 DCHECK_EQ(calling_convention.GetRegisterAt(0), first.AsRegisterPairLow<Register>());
3633 DCHECK_EQ(calling_convention.GetRegisterAt(1), first.AsRegisterPairHigh<Register>());
3634 DCHECK_EQ(calling_convention.GetRegisterAt(2), second.AsRegisterPairLow<Register>());
3635 DCHECK_EQ(calling_convention.GetRegisterAt(3), second.AsRegisterPairHigh<Register>());
3636 DCHECK_EQ(EAX, out.AsRegisterPairLow<Register>());
3637 DCHECK_EQ(EDX, out.AsRegisterPairHigh<Register>());
3638
3639 if (is_div) {
3640 codegen_->InvokeRuntime(kQuickLdiv, instruction, instruction->GetDexPc());
3641 CheckEntrypointTypes<kQuickLdiv, int64_t, int64_t, int64_t>();
3642 } else {
3643 codegen_->InvokeRuntime(kQuickLmod, instruction, instruction->GetDexPc());
3644 CheckEntrypointTypes<kQuickLmod, int64_t, int64_t, int64_t>();
3645 }
3646 break;
3647 }
3648
3649 default:
3650 LOG(FATAL) << "Unexpected type for GenerateDivRemIntegral " << instruction->GetResultType();
3651 }
3652 }
3653
VisitDiv(HDiv * div)3654 void LocationsBuilderX86::VisitDiv(HDiv* div) {
3655 LocationSummary::CallKind call_kind = (div->GetResultType() == Primitive::kPrimLong)
3656 ? LocationSummary::kCallOnMainOnly
3657 : LocationSummary::kNoCall;
3658 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(div, call_kind);
3659
3660 switch (div->GetResultType()) {
3661 case Primitive::kPrimInt: {
3662 locations->SetInAt(0, Location::RegisterLocation(EAX));
3663 locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1)));
3664 locations->SetOut(Location::SameAsFirstInput());
3665 // Intel uses edx:eax as the dividend.
3666 locations->AddTemp(Location::RegisterLocation(EDX));
3667 // We need to save the numerator while we tweak eax and edx. As we are using imul in a way
3668 // which enforces results to be in EAX and EDX, things are simpler if we use EAX also as
3669 // output and request another temp.
3670 if (div->InputAt(1)->IsIntConstant()) {
3671 locations->AddTemp(Location::RequiresRegister());
3672 }
3673 break;
3674 }
3675 case Primitive::kPrimLong: {
3676 InvokeRuntimeCallingConvention calling_convention;
3677 locations->SetInAt(0, Location::RegisterPairLocation(
3678 calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1)));
3679 locations->SetInAt(1, Location::RegisterPairLocation(
3680 calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3)));
3681 // Runtime helper puts the result in EAX, EDX.
3682 locations->SetOut(Location::RegisterPairLocation(EAX, EDX));
3683 break;
3684 }
3685 case Primitive::kPrimFloat:
3686 case Primitive::kPrimDouble: {
3687 locations->SetInAt(0, Location::RequiresFpuRegister());
3688 if (div->InputAt(1)->IsX86LoadFromConstantTable()) {
3689 DCHECK(div->InputAt(1)->IsEmittedAtUseSite());
3690 } else if (div->InputAt(1)->IsConstant()) {
3691 locations->SetInAt(1, Location::RequiresFpuRegister());
3692 } else {
3693 locations->SetInAt(1, Location::Any());
3694 }
3695 locations->SetOut(Location::SameAsFirstInput());
3696 break;
3697 }
3698
3699 default:
3700 LOG(FATAL) << "Unexpected div type " << div->GetResultType();
3701 }
3702 }
3703
VisitDiv(HDiv * div)3704 void InstructionCodeGeneratorX86::VisitDiv(HDiv* div) {
3705 LocationSummary* locations = div->GetLocations();
3706 Location first = locations->InAt(0);
3707 Location second = locations->InAt(1);
3708
3709 switch (div->GetResultType()) {
3710 case Primitive::kPrimInt:
3711 case Primitive::kPrimLong: {
3712 GenerateDivRemIntegral(div);
3713 break;
3714 }
3715
3716 case Primitive::kPrimFloat: {
3717 if (second.IsFpuRegister()) {
3718 __ divss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3719 } else if (div->InputAt(1)->IsX86LoadFromConstantTable()) {
3720 HX86LoadFromConstantTable* const_area = div->InputAt(1)->AsX86LoadFromConstantTable();
3721 DCHECK(const_area->IsEmittedAtUseSite());
3722 __ divss(first.AsFpuRegister<XmmRegister>(),
3723 codegen_->LiteralFloatAddress(
3724 const_area->GetConstant()->AsFloatConstant()->GetValue(),
3725 const_area->GetBaseMethodAddress(),
3726 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3727 } else {
3728 DCHECK(second.IsStackSlot());
3729 __ divss(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3730 }
3731 break;
3732 }
3733
3734 case Primitive::kPrimDouble: {
3735 if (second.IsFpuRegister()) {
3736 __ divsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3737 } else if (div->InputAt(1)->IsX86LoadFromConstantTable()) {
3738 HX86LoadFromConstantTable* const_area = div->InputAt(1)->AsX86LoadFromConstantTable();
3739 DCHECK(const_area->IsEmittedAtUseSite());
3740 __ divsd(first.AsFpuRegister<XmmRegister>(),
3741 codegen_->LiteralDoubleAddress(
3742 const_area->GetConstant()->AsDoubleConstant()->GetValue(),
3743 const_area->GetBaseMethodAddress(),
3744 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3745 } else {
3746 DCHECK(second.IsDoubleStackSlot());
3747 __ divsd(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3748 }
3749 break;
3750 }
3751
3752 default:
3753 LOG(FATAL) << "Unexpected div type " << div->GetResultType();
3754 }
3755 }
3756
VisitRem(HRem * rem)3757 void LocationsBuilderX86::VisitRem(HRem* rem) {
3758 Primitive::Type type = rem->GetResultType();
3759
3760 LocationSummary::CallKind call_kind = (rem->GetResultType() == Primitive::kPrimLong)
3761 ? LocationSummary::kCallOnMainOnly
3762 : LocationSummary::kNoCall;
3763 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(rem, call_kind);
3764
3765 switch (type) {
3766 case Primitive::kPrimInt: {
3767 locations->SetInAt(0, Location::RegisterLocation(EAX));
3768 locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1)));
3769 locations->SetOut(Location::RegisterLocation(EDX));
3770 // We need to save the numerator while we tweak eax and edx. As we are using imul in a way
3771 // which enforces results to be in EAX and EDX, things are simpler if we use EDX also as
3772 // output and request another temp.
3773 if (rem->InputAt(1)->IsIntConstant()) {
3774 locations->AddTemp(Location::RequiresRegister());
3775 }
3776 break;
3777 }
3778 case Primitive::kPrimLong: {
3779 InvokeRuntimeCallingConvention calling_convention;
3780 locations->SetInAt(0, Location::RegisterPairLocation(
3781 calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1)));
3782 locations->SetInAt(1, Location::RegisterPairLocation(
3783 calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3)));
3784 // Runtime helper puts the result in EAX, EDX.
3785 locations->SetOut(Location::RegisterPairLocation(EAX, EDX));
3786 break;
3787 }
3788 case Primitive::kPrimDouble:
3789 case Primitive::kPrimFloat: {
3790 locations->SetInAt(0, Location::Any());
3791 locations->SetInAt(1, Location::Any());
3792 locations->SetOut(Location::RequiresFpuRegister());
3793 locations->AddTemp(Location::RegisterLocation(EAX));
3794 break;
3795 }
3796
3797 default:
3798 LOG(FATAL) << "Unexpected rem type " << type;
3799 }
3800 }
3801
VisitRem(HRem * rem)3802 void InstructionCodeGeneratorX86::VisitRem(HRem* rem) {
3803 Primitive::Type type = rem->GetResultType();
3804 switch (type) {
3805 case Primitive::kPrimInt:
3806 case Primitive::kPrimLong: {
3807 GenerateDivRemIntegral(rem);
3808 break;
3809 }
3810 case Primitive::kPrimFloat:
3811 case Primitive::kPrimDouble: {
3812 GenerateRemFP(rem);
3813 break;
3814 }
3815 default:
3816 LOG(FATAL) << "Unexpected rem type " << type;
3817 }
3818 }
3819
VisitDivZeroCheck(HDivZeroCheck * instruction)3820 void LocationsBuilderX86::VisitDivZeroCheck(HDivZeroCheck* instruction) {
3821 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
3822 switch (instruction->GetType()) {
3823 case Primitive::kPrimBoolean:
3824 case Primitive::kPrimByte:
3825 case Primitive::kPrimChar:
3826 case Primitive::kPrimShort:
3827 case Primitive::kPrimInt: {
3828 locations->SetInAt(0, Location::Any());
3829 break;
3830 }
3831 case Primitive::kPrimLong: {
3832 locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
3833 if (!instruction->IsConstant()) {
3834 locations->AddTemp(Location::RequiresRegister());
3835 }
3836 break;
3837 }
3838 default:
3839 LOG(FATAL) << "Unexpected type for HDivZeroCheck " << instruction->GetType();
3840 }
3841 }
3842
VisitDivZeroCheck(HDivZeroCheck * instruction)3843 void InstructionCodeGeneratorX86::VisitDivZeroCheck(HDivZeroCheck* instruction) {
3844 SlowPathCode* slow_path = new (GetGraph()->GetArena()) DivZeroCheckSlowPathX86(instruction);
3845 codegen_->AddSlowPath(slow_path);
3846
3847 LocationSummary* locations = instruction->GetLocations();
3848 Location value = locations->InAt(0);
3849
3850 switch (instruction->GetType()) {
3851 case Primitive::kPrimBoolean:
3852 case Primitive::kPrimByte:
3853 case Primitive::kPrimChar:
3854 case Primitive::kPrimShort:
3855 case Primitive::kPrimInt: {
3856 if (value.IsRegister()) {
3857 __ testl(value.AsRegister<Register>(), value.AsRegister<Register>());
3858 __ j(kEqual, slow_path->GetEntryLabel());
3859 } else if (value.IsStackSlot()) {
3860 __ cmpl(Address(ESP, value.GetStackIndex()), Immediate(0));
3861 __ j(kEqual, slow_path->GetEntryLabel());
3862 } else {
3863 DCHECK(value.IsConstant()) << value;
3864 if (value.GetConstant()->AsIntConstant()->GetValue() == 0) {
3865 __ jmp(slow_path->GetEntryLabel());
3866 }
3867 }
3868 break;
3869 }
3870 case Primitive::kPrimLong: {
3871 if (value.IsRegisterPair()) {
3872 Register temp = locations->GetTemp(0).AsRegister<Register>();
3873 __ movl(temp, value.AsRegisterPairLow<Register>());
3874 __ orl(temp, value.AsRegisterPairHigh<Register>());
3875 __ j(kEqual, slow_path->GetEntryLabel());
3876 } else {
3877 DCHECK(value.IsConstant()) << value;
3878 if (value.GetConstant()->AsLongConstant()->GetValue() == 0) {
3879 __ jmp(slow_path->GetEntryLabel());
3880 }
3881 }
3882 break;
3883 }
3884 default:
3885 LOG(FATAL) << "Unexpected type for HDivZeroCheck" << instruction->GetType();
3886 }
3887 }
3888
HandleShift(HBinaryOperation * op)3889 void LocationsBuilderX86::HandleShift(HBinaryOperation* op) {
3890 DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
3891
3892 LocationSummary* locations =
3893 new (GetGraph()->GetArena()) LocationSummary(op, LocationSummary::kNoCall);
3894
3895 switch (op->GetResultType()) {
3896 case Primitive::kPrimInt:
3897 case Primitive::kPrimLong: {
3898 // Can't have Location::Any() and output SameAsFirstInput()
3899 locations->SetInAt(0, Location::RequiresRegister());
3900 // The shift count needs to be in CL or a constant.
3901 locations->SetInAt(1, Location::ByteRegisterOrConstant(ECX, op->InputAt(1)));
3902 locations->SetOut(Location::SameAsFirstInput());
3903 break;
3904 }
3905 default:
3906 LOG(FATAL) << "Unexpected op type " << op->GetResultType();
3907 }
3908 }
3909
HandleShift(HBinaryOperation * op)3910 void InstructionCodeGeneratorX86::HandleShift(HBinaryOperation* op) {
3911 DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
3912
3913 LocationSummary* locations = op->GetLocations();
3914 Location first = locations->InAt(0);
3915 Location second = locations->InAt(1);
3916 DCHECK(first.Equals(locations->Out()));
3917
3918 switch (op->GetResultType()) {
3919 case Primitive::kPrimInt: {
3920 DCHECK(first.IsRegister());
3921 Register first_reg = first.AsRegister<Register>();
3922 if (second.IsRegister()) {
3923 Register second_reg = second.AsRegister<Register>();
3924 DCHECK_EQ(ECX, second_reg);
3925 if (op->IsShl()) {
3926 __ shll(first_reg, second_reg);
3927 } else if (op->IsShr()) {
3928 __ sarl(first_reg, second_reg);
3929 } else {
3930 __ shrl(first_reg, second_reg);
3931 }
3932 } else {
3933 int32_t shift = second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftDistance;
3934 if (shift == 0) {
3935 return;
3936 }
3937 Immediate imm(shift);
3938 if (op->IsShl()) {
3939 __ shll(first_reg, imm);
3940 } else if (op->IsShr()) {
3941 __ sarl(first_reg, imm);
3942 } else {
3943 __ shrl(first_reg, imm);
3944 }
3945 }
3946 break;
3947 }
3948 case Primitive::kPrimLong: {
3949 if (second.IsRegister()) {
3950 Register second_reg = second.AsRegister<Register>();
3951 DCHECK_EQ(ECX, second_reg);
3952 if (op->IsShl()) {
3953 GenerateShlLong(first, second_reg);
3954 } else if (op->IsShr()) {
3955 GenerateShrLong(first, second_reg);
3956 } else {
3957 GenerateUShrLong(first, second_reg);
3958 }
3959 } else {
3960 // Shift by a constant.
3961 int32_t shift = second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftDistance;
3962 // Nothing to do if the shift is 0, as the input is already the output.
3963 if (shift != 0) {
3964 if (op->IsShl()) {
3965 GenerateShlLong(first, shift);
3966 } else if (op->IsShr()) {
3967 GenerateShrLong(first, shift);
3968 } else {
3969 GenerateUShrLong(first, shift);
3970 }
3971 }
3972 }
3973 break;
3974 }
3975 default:
3976 LOG(FATAL) << "Unexpected op type " << op->GetResultType();
3977 }
3978 }
3979
GenerateShlLong(const Location & loc,int shift)3980 void InstructionCodeGeneratorX86::GenerateShlLong(const Location& loc, int shift) {
3981 Register low = loc.AsRegisterPairLow<Register>();
3982 Register high = loc.AsRegisterPairHigh<Register>();
3983 if (shift == 1) {
3984 // This is just an addition.
3985 __ addl(low, low);
3986 __ adcl(high, high);
3987 } else if (shift == 32) {
3988 // Shift by 32 is easy. High gets low, and low gets 0.
3989 codegen_->EmitParallelMoves(
3990 loc.ToLow(),
3991 loc.ToHigh(),
3992 Primitive::kPrimInt,
3993 Location::ConstantLocation(GetGraph()->GetIntConstant(0)),
3994 loc.ToLow(),
3995 Primitive::kPrimInt);
3996 } else if (shift > 32) {
3997 // Low part becomes 0. High part is low part << (shift-32).
3998 __ movl(high, low);
3999 __ shll(high, Immediate(shift - 32));
4000 __ xorl(low, low);
4001 } else {
4002 // Between 1 and 31.
4003 __ shld(high, low, Immediate(shift));
4004 __ shll(low, Immediate(shift));
4005 }
4006 }
4007
GenerateShlLong(const Location & loc,Register shifter)4008 void InstructionCodeGeneratorX86::GenerateShlLong(const Location& loc, Register shifter) {
4009 NearLabel done;
4010 __ shld(loc.AsRegisterPairHigh<Register>(), loc.AsRegisterPairLow<Register>(), shifter);
4011 __ shll(loc.AsRegisterPairLow<Register>(), shifter);
4012 __ testl(shifter, Immediate(32));
4013 __ j(kEqual, &done);
4014 __ movl(loc.AsRegisterPairHigh<Register>(), loc.AsRegisterPairLow<Register>());
4015 __ movl(loc.AsRegisterPairLow<Register>(), Immediate(0));
4016 __ Bind(&done);
4017 }
4018
GenerateShrLong(const Location & loc,int shift)4019 void InstructionCodeGeneratorX86::GenerateShrLong(const Location& loc, int shift) {
4020 Register low = loc.AsRegisterPairLow<Register>();
4021 Register high = loc.AsRegisterPairHigh<Register>();
4022 if (shift == 32) {
4023 // Need to copy the sign.
4024 DCHECK_NE(low, high);
4025 __ movl(low, high);
4026 __ sarl(high, Immediate(31));
4027 } else if (shift > 32) {
4028 DCHECK_NE(low, high);
4029 // High part becomes sign. Low part is shifted by shift - 32.
4030 __ movl(low, high);
4031 __ sarl(high, Immediate(31));
4032 __ sarl(low, Immediate(shift - 32));
4033 } else {
4034 // Between 1 and 31.
4035 __ shrd(low, high, Immediate(shift));
4036 __ sarl(high, Immediate(shift));
4037 }
4038 }
4039
GenerateShrLong(const Location & loc,Register shifter)4040 void InstructionCodeGeneratorX86::GenerateShrLong(const Location& loc, Register shifter) {
4041 NearLabel done;
4042 __ shrd(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>(), shifter);
4043 __ sarl(loc.AsRegisterPairHigh<Register>(), shifter);
4044 __ testl(shifter, Immediate(32));
4045 __ j(kEqual, &done);
4046 __ movl(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>());
4047 __ sarl(loc.AsRegisterPairHigh<Register>(), Immediate(31));
4048 __ Bind(&done);
4049 }
4050
GenerateUShrLong(const Location & loc,int shift)4051 void InstructionCodeGeneratorX86::GenerateUShrLong(const Location& loc, int shift) {
4052 Register low = loc.AsRegisterPairLow<Register>();
4053 Register high = loc.AsRegisterPairHigh<Register>();
4054 if (shift == 32) {
4055 // Shift by 32 is easy. Low gets high, and high gets 0.
4056 codegen_->EmitParallelMoves(
4057 loc.ToHigh(),
4058 loc.ToLow(),
4059 Primitive::kPrimInt,
4060 Location::ConstantLocation(GetGraph()->GetIntConstant(0)),
4061 loc.ToHigh(),
4062 Primitive::kPrimInt);
4063 } else if (shift > 32) {
4064 // Low part is high >> (shift - 32). High part becomes 0.
4065 __ movl(low, high);
4066 __ shrl(low, Immediate(shift - 32));
4067 __ xorl(high, high);
4068 } else {
4069 // Between 1 and 31.
4070 __ shrd(low, high, Immediate(shift));
4071 __ shrl(high, Immediate(shift));
4072 }
4073 }
4074
GenerateUShrLong(const Location & loc,Register shifter)4075 void InstructionCodeGeneratorX86::GenerateUShrLong(const Location& loc, Register shifter) {
4076 NearLabel done;
4077 __ shrd(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>(), shifter);
4078 __ shrl(loc.AsRegisterPairHigh<Register>(), shifter);
4079 __ testl(shifter, Immediate(32));
4080 __ j(kEqual, &done);
4081 __ movl(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>());
4082 __ movl(loc.AsRegisterPairHigh<Register>(), Immediate(0));
4083 __ Bind(&done);
4084 }
4085
VisitRor(HRor * ror)4086 void LocationsBuilderX86::VisitRor(HRor* ror) {
4087 LocationSummary* locations =
4088 new (GetGraph()->GetArena()) LocationSummary(ror, LocationSummary::kNoCall);
4089
4090 switch (ror->GetResultType()) {
4091 case Primitive::kPrimLong:
4092 // Add the temporary needed.
4093 locations->AddTemp(Location::RequiresRegister());
4094 FALLTHROUGH_INTENDED;
4095 case Primitive::kPrimInt:
4096 locations->SetInAt(0, Location::RequiresRegister());
4097 // The shift count needs to be in CL (unless it is a constant).
4098 locations->SetInAt(1, Location::ByteRegisterOrConstant(ECX, ror->InputAt(1)));
4099 locations->SetOut(Location::SameAsFirstInput());
4100 break;
4101 default:
4102 LOG(FATAL) << "Unexpected operation type " << ror->GetResultType();
4103 UNREACHABLE();
4104 }
4105 }
4106
VisitRor(HRor * ror)4107 void InstructionCodeGeneratorX86::VisitRor(HRor* ror) {
4108 LocationSummary* locations = ror->GetLocations();
4109 Location first = locations->InAt(0);
4110 Location second = locations->InAt(1);
4111
4112 if (ror->GetResultType() == Primitive::kPrimInt) {
4113 Register first_reg = first.AsRegister<Register>();
4114 if (second.IsRegister()) {
4115 Register second_reg = second.AsRegister<Register>();
4116 __ rorl(first_reg, second_reg);
4117 } else {
4118 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftDistance);
4119 __ rorl(first_reg, imm);
4120 }
4121 return;
4122 }
4123
4124 DCHECK_EQ(ror->GetResultType(), Primitive::kPrimLong);
4125 Register first_reg_lo = first.AsRegisterPairLow<Register>();
4126 Register first_reg_hi = first.AsRegisterPairHigh<Register>();
4127 Register temp_reg = locations->GetTemp(0).AsRegister<Register>();
4128 if (second.IsRegister()) {
4129 Register second_reg = second.AsRegister<Register>();
4130 DCHECK_EQ(second_reg, ECX);
4131 __ movl(temp_reg, first_reg_hi);
4132 __ shrd(first_reg_hi, first_reg_lo, second_reg);
4133 __ shrd(first_reg_lo, temp_reg, second_reg);
4134 __ movl(temp_reg, first_reg_hi);
4135 __ testl(second_reg, Immediate(32));
4136 __ cmovl(kNotEqual, first_reg_hi, first_reg_lo);
4137 __ cmovl(kNotEqual, first_reg_lo, temp_reg);
4138 } else {
4139 int32_t shift_amt = second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftDistance;
4140 if (shift_amt == 0) {
4141 // Already fine.
4142 return;
4143 }
4144 if (shift_amt == 32) {
4145 // Just swap.
4146 __ movl(temp_reg, first_reg_lo);
4147 __ movl(first_reg_lo, first_reg_hi);
4148 __ movl(first_reg_hi, temp_reg);
4149 return;
4150 }
4151
4152 Immediate imm(shift_amt);
4153 // Save the constents of the low value.
4154 __ movl(temp_reg, first_reg_lo);
4155
4156 // Shift right into low, feeding bits from high.
4157 __ shrd(first_reg_lo, first_reg_hi, imm);
4158
4159 // Shift right into high, feeding bits from the original low.
4160 __ shrd(first_reg_hi, temp_reg, imm);
4161
4162 // Swap if needed.
4163 if (shift_amt > 32) {
4164 __ movl(temp_reg, first_reg_lo);
4165 __ movl(first_reg_lo, first_reg_hi);
4166 __ movl(first_reg_hi, temp_reg);
4167 }
4168 }
4169 }
4170
VisitShl(HShl * shl)4171 void LocationsBuilderX86::VisitShl(HShl* shl) {
4172 HandleShift(shl);
4173 }
4174
VisitShl(HShl * shl)4175 void InstructionCodeGeneratorX86::VisitShl(HShl* shl) {
4176 HandleShift(shl);
4177 }
4178
VisitShr(HShr * shr)4179 void LocationsBuilderX86::VisitShr(HShr* shr) {
4180 HandleShift(shr);
4181 }
4182
VisitShr(HShr * shr)4183 void InstructionCodeGeneratorX86::VisitShr(HShr* shr) {
4184 HandleShift(shr);
4185 }
4186
VisitUShr(HUShr * ushr)4187 void LocationsBuilderX86::VisitUShr(HUShr* ushr) {
4188 HandleShift(ushr);
4189 }
4190
VisitUShr(HUShr * ushr)4191 void InstructionCodeGeneratorX86::VisitUShr(HUShr* ushr) {
4192 HandleShift(ushr);
4193 }
4194
VisitNewInstance(HNewInstance * instruction)4195 void LocationsBuilderX86::VisitNewInstance(HNewInstance* instruction) {
4196 LocationSummary* locations =
4197 new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
4198 locations->SetOut(Location::RegisterLocation(EAX));
4199 if (instruction->IsStringAlloc()) {
4200 locations->AddTemp(Location::RegisterLocation(kMethodRegisterArgument));
4201 } else {
4202 InvokeRuntimeCallingConvention calling_convention;
4203 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
4204 }
4205 }
4206
VisitNewInstance(HNewInstance * instruction)4207 void InstructionCodeGeneratorX86::VisitNewInstance(HNewInstance* instruction) {
4208 // Note: if heap poisoning is enabled, the entry point takes cares
4209 // of poisoning the reference.
4210 if (instruction->IsStringAlloc()) {
4211 // String is allocated through StringFactory. Call NewEmptyString entry point.
4212 Register temp = instruction->GetLocations()->GetTemp(0).AsRegister<Register>();
4213 MemberOffset code_offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86PointerSize);
4214 __ fs()->movl(temp, Address::Absolute(QUICK_ENTRY_POINT(pNewEmptyString)));
4215 __ call(Address(temp, code_offset.Int32Value()));
4216 codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
4217 } else {
4218 codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
4219 CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
4220 DCHECK(!codegen_->IsLeafMethod());
4221 }
4222 }
4223
VisitNewArray(HNewArray * instruction)4224 void LocationsBuilderX86::VisitNewArray(HNewArray* instruction) {
4225 LocationSummary* locations =
4226 new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
4227 locations->SetOut(Location::RegisterLocation(EAX));
4228 InvokeRuntimeCallingConvention calling_convention;
4229 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
4230 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
4231 }
4232
VisitNewArray(HNewArray * instruction)4233 void InstructionCodeGeneratorX86::VisitNewArray(HNewArray* instruction) {
4234 // Note: if heap poisoning is enabled, the entry point takes cares
4235 // of poisoning the reference.
4236 QuickEntrypointEnum entrypoint =
4237 CodeGenerator::GetArrayAllocationEntrypoint(instruction->GetLoadClass()->GetClass());
4238 codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc());
4239 CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>();
4240 DCHECK(!codegen_->IsLeafMethod());
4241 }
4242
VisitParameterValue(HParameterValue * instruction)4243 void LocationsBuilderX86::VisitParameterValue(HParameterValue* instruction) {
4244 LocationSummary* locations =
4245 new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
4246 Location location = parameter_visitor_.GetNextLocation(instruction->GetType());
4247 if (location.IsStackSlot()) {
4248 location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
4249 } else if (location.IsDoubleStackSlot()) {
4250 location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
4251 }
4252 locations->SetOut(location);
4253 }
4254
VisitParameterValue(HParameterValue * instruction ATTRIBUTE_UNUSED)4255 void InstructionCodeGeneratorX86::VisitParameterValue(
4256 HParameterValue* instruction ATTRIBUTE_UNUSED) {
4257 }
4258
VisitCurrentMethod(HCurrentMethod * instruction)4259 void LocationsBuilderX86::VisitCurrentMethod(HCurrentMethod* instruction) {
4260 LocationSummary* locations =
4261 new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
4262 locations->SetOut(Location::RegisterLocation(kMethodRegisterArgument));
4263 }
4264
VisitCurrentMethod(HCurrentMethod * instruction ATTRIBUTE_UNUSED)4265 void InstructionCodeGeneratorX86::VisitCurrentMethod(HCurrentMethod* instruction ATTRIBUTE_UNUSED) {
4266 }
4267
VisitClassTableGet(HClassTableGet * instruction)4268 void LocationsBuilderX86::VisitClassTableGet(HClassTableGet* instruction) {
4269 LocationSummary* locations =
4270 new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
4271 locations->SetInAt(0, Location::RequiresRegister());
4272 locations->SetOut(Location::RequiresRegister());
4273 }
4274
VisitClassTableGet(HClassTableGet * instruction)4275 void InstructionCodeGeneratorX86::VisitClassTableGet(HClassTableGet* instruction) {
4276 LocationSummary* locations = instruction->GetLocations();
4277 if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) {
4278 uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
4279 instruction->GetIndex(), kX86PointerSize).SizeValue();
4280 __ movl(locations->Out().AsRegister<Register>(),
4281 Address(locations->InAt(0).AsRegister<Register>(), method_offset));
4282 } else {
4283 uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
4284 instruction->GetIndex(), kX86PointerSize));
4285 __ movl(locations->Out().AsRegister<Register>(),
4286 Address(locations->InAt(0).AsRegister<Register>(),
4287 mirror::Class::ImtPtrOffset(kX86PointerSize).Uint32Value()));
4288 // temp = temp->GetImtEntryAt(method_offset);
4289 __ movl(locations->Out().AsRegister<Register>(),
4290 Address(locations->Out().AsRegister<Register>(), method_offset));
4291 }
4292 }
4293
VisitNot(HNot * not_)4294 void LocationsBuilderX86::VisitNot(HNot* not_) {
4295 LocationSummary* locations =
4296 new (GetGraph()->GetArena()) LocationSummary(not_, LocationSummary::kNoCall);
4297 locations->SetInAt(0, Location::RequiresRegister());
4298 locations->SetOut(Location::SameAsFirstInput());
4299 }
4300
VisitNot(HNot * not_)4301 void InstructionCodeGeneratorX86::VisitNot(HNot* not_) {
4302 LocationSummary* locations = not_->GetLocations();
4303 Location in = locations->InAt(0);
4304 Location out = locations->Out();
4305 DCHECK(in.Equals(out));
4306 switch (not_->GetResultType()) {
4307 case Primitive::kPrimInt:
4308 __ notl(out.AsRegister<Register>());
4309 break;
4310
4311 case Primitive::kPrimLong:
4312 __ notl(out.AsRegisterPairLow<Register>());
4313 __ notl(out.AsRegisterPairHigh<Register>());
4314 break;
4315
4316 default:
4317 LOG(FATAL) << "Unimplemented type for not operation " << not_->GetResultType();
4318 }
4319 }
4320
VisitBooleanNot(HBooleanNot * bool_not)4321 void LocationsBuilderX86::VisitBooleanNot(HBooleanNot* bool_not) {
4322 LocationSummary* locations =
4323 new (GetGraph()->GetArena()) LocationSummary(bool_not, LocationSummary::kNoCall);
4324 locations->SetInAt(0, Location::RequiresRegister());
4325 locations->SetOut(Location::SameAsFirstInput());
4326 }
4327
VisitBooleanNot(HBooleanNot * bool_not)4328 void InstructionCodeGeneratorX86::VisitBooleanNot(HBooleanNot* bool_not) {
4329 LocationSummary* locations = bool_not->GetLocations();
4330 Location in = locations->InAt(0);
4331 Location out = locations->Out();
4332 DCHECK(in.Equals(out));
4333 __ xorl(out.AsRegister<Register>(), Immediate(1));
4334 }
4335
VisitCompare(HCompare * compare)4336 void LocationsBuilderX86::VisitCompare(HCompare* compare) {
4337 LocationSummary* locations =
4338 new (GetGraph()->GetArena()) LocationSummary(compare, LocationSummary::kNoCall);
4339 switch (compare->InputAt(0)->GetType()) {
4340 case Primitive::kPrimBoolean:
4341 case Primitive::kPrimByte:
4342 case Primitive::kPrimShort:
4343 case Primitive::kPrimChar:
4344 case Primitive::kPrimInt:
4345 case Primitive::kPrimLong: {
4346 locations->SetInAt(0, Location::RequiresRegister());
4347 locations->SetInAt(1, Location::Any());
4348 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
4349 break;
4350 }
4351 case Primitive::kPrimFloat:
4352 case Primitive::kPrimDouble: {
4353 locations->SetInAt(0, Location::RequiresFpuRegister());
4354 if (compare->InputAt(1)->IsX86LoadFromConstantTable()) {
4355 DCHECK(compare->InputAt(1)->IsEmittedAtUseSite());
4356 } else if (compare->InputAt(1)->IsConstant()) {
4357 locations->SetInAt(1, Location::RequiresFpuRegister());
4358 } else {
4359 locations->SetInAt(1, Location::Any());
4360 }
4361 locations->SetOut(Location::RequiresRegister());
4362 break;
4363 }
4364 default:
4365 LOG(FATAL) << "Unexpected type for compare operation " << compare->InputAt(0)->GetType();
4366 }
4367 }
4368
VisitCompare(HCompare * compare)4369 void InstructionCodeGeneratorX86::VisitCompare(HCompare* compare) {
4370 LocationSummary* locations = compare->GetLocations();
4371 Register out = locations->Out().AsRegister<Register>();
4372 Location left = locations->InAt(0);
4373 Location right = locations->InAt(1);
4374
4375 NearLabel less, greater, done;
4376 Condition less_cond = kLess;
4377
4378 switch (compare->InputAt(0)->GetType()) {
4379 case Primitive::kPrimBoolean:
4380 case Primitive::kPrimByte:
4381 case Primitive::kPrimShort:
4382 case Primitive::kPrimChar:
4383 case Primitive::kPrimInt: {
4384 codegen_->GenerateIntCompare(left, right);
4385 break;
4386 }
4387 case Primitive::kPrimLong: {
4388 Register left_low = left.AsRegisterPairLow<Register>();
4389 Register left_high = left.AsRegisterPairHigh<Register>();
4390 int32_t val_low = 0;
4391 int32_t val_high = 0;
4392 bool right_is_const = false;
4393
4394 if (right.IsConstant()) {
4395 DCHECK(right.GetConstant()->IsLongConstant());
4396 right_is_const = true;
4397 int64_t val = right.GetConstant()->AsLongConstant()->GetValue();
4398 val_low = Low32Bits(val);
4399 val_high = High32Bits(val);
4400 }
4401
4402 if (right.IsRegisterPair()) {
4403 __ cmpl(left_high, right.AsRegisterPairHigh<Register>());
4404 } else if (right.IsDoubleStackSlot()) {
4405 __ cmpl(left_high, Address(ESP, right.GetHighStackIndex(kX86WordSize)));
4406 } else {
4407 DCHECK(right_is_const) << right;
4408 codegen_->Compare32BitValue(left_high, val_high);
4409 }
4410 __ j(kLess, &less); // Signed compare.
4411 __ j(kGreater, &greater); // Signed compare.
4412 if (right.IsRegisterPair()) {
4413 __ cmpl(left_low, right.AsRegisterPairLow<Register>());
4414 } else if (right.IsDoubleStackSlot()) {
4415 __ cmpl(left_low, Address(ESP, right.GetStackIndex()));
4416 } else {
4417 DCHECK(right_is_const) << right;
4418 codegen_->Compare32BitValue(left_low, val_low);
4419 }
4420 less_cond = kBelow; // for CF (unsigned).
4421 break;
4422 }
4423 case Primitive::kPrimFloat: {
4424 GenerateFPCompare(left, right, compare, false);
4425 __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
4426 less_cond = kBelow; // for CF (floats).
4427 break;
4428 }
4429 case Primitive::kPrimDouble: {
4430 GenerateFPCompare(left, right, compare, true);
4431 __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
4432 less_cond = kBelow; // for CF (floats).
4433 break;
4434 }
4435 default:
4436 LOG(FATAL) << "Unexpected type for compare operation " << compare->InputAt(0)->GetType();
4437 }
4438
4439 __ movl(out, Immediate(0));
4440 __ j(kEqual, &done);
4441 __ j(less_cond, &less);
4442
4443 __ Bind(&greater);
4444 __ movl(out, Immediate(1));
4445 __ jmp(&done);
4446
4447 __ Bind(&less);
4448 __ movl(out, Immediate(-1));
4449
4450 __ Bind(&done);
4451 }
4452
VisitPhi(HPhi * instruction)4453 void LocationsBuilderX86::VisitPhi(HPhi* instruction) {
4454 LocationSummary* locations =
4455 new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
4456 for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
4457 locations->SetInAt(i, Location::Any());
4458 }
4459 locations->SetOut(Location::Any());
4460 }
4461
VisitPhi(HPhi * instruction ATTRIBUTE_UNUSED)4462 void InstructionCodeGeneratorX86::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) {
4463 LOG(FATAL) << "Unreachable";
4464 }
4465
GenerateMemoryBarrier(MemBarrierKind kind)4466 void CodeGeneratorX86::GenerateMemoryBarrier(MemBarrierKind kind) {
4467 /*
4468 * According to the JSR-133 Cookbook, for x86 only StoreLoad/AnyAny barriers need memory fence.
4469 * All other barriers (LoadAny, AnyStore, StoreStore) are nops due to the x86 memory model.
4470 * For those cases, all we need to ensure is that there is a scheduling barrier in place.
4471 */
4472 switch (kind) {
4473 case MemBarrierKind::kAnyAny: {
4474 MemoryFence();
4475 break;
4476 }
4477 case MemBarrierKind::kAnyStore:
4478 case MemBarrierKind::kLoadAny:
4479 case MemBarrierKind::kStoreStore: {
4480 // nop
4481 break;
4482 }
4483 case MemBarrierKind::kNTStoreStore:
4484 // Non-Temporal Store/Store needs an explicit fence.
4485 MemoryFence(/* non-temporal */ true);
4486 break;
4487 }
4488 }
4489
GetSupportedInvokeStaticOrDirectDispatch(const HInvokeStaticOrDirect::DispatchInfo & desired_dispatch_info,HInvokeStaticOrDirect * invoke ATTRIBUTE_UNUSED)4490 HInvokeStaticOrDirect::DispatchInfo CodeGeneratorX86::GetSupportedInvokeStaticOrDirectDispatch(
4491 const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
4492 HInvokeStaticOrDirect* invoke ATTRIBUTE_UNUSED) {
4493 return desired_dispatch_info;
4494 }
4495
GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect * invoke,Register temp)4496 Register CodeGeneratorX86::GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke,
4497 Register temp) {
4498 DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u);
4499 Location location = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
4500 if (!invoke->GetLocations()->Intrinsified()) {
4501 return location.AsRegister<Register>();
4502 }
4503 // For intrinsics we allow any location, so it may be on the stack.
4504 if (!location.IsRegister()) {
4505 __ movl(temp, Address(ESP, location.GetStackIndex()));
4506 return temp;
4507 }
4508 // For register locations, check if the register was saved. If so, get it from the stack.
4509 // Note: There is a chance that the register was saved but not overwritten, so we could
4510 // save one load. However, since this is just an intrinsic slow path we prefer this
4511 // simple and more robust approach rather that trying to determine if that's the case.
4512 SlowPathCode* slow_path = GetCurrentSlowPath();
4513 if (slow_path != nullptr) {
4514 if (slow_path->IsCoreRegisterSaved(location.AsRegister<Register>())) {
4515 int stack_offset = slow_path->GetStackOffsetOfCoreRegister(location.AsRegister<Register>());
4516 __ movl(temp, Address(ESP, stack_offset));
4517 return temp;
4518 }
4519 }
4520 return location.AsRegister<Register>();
4521 }
4522
GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticOrDirect * invoke,Location temp)4523 Location CodeGeneratorX86::GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticOrDirect* invoke,
4524 Location temp) {
4525 Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp.
4526 switch (invoke->GetMethodLoadKind()) {
4527 case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: {
4528 // temp = thread->string_init_entrypoint
4529 uint32_t offset =
4530 GetThreadOffset<kX86PointerSize>(invoke->GetStringInitEntryPoint()).Int32Value();
4531 __ fs()->movl(temp.AsRegister<Register>(), Address::Absolute(offset));
4532 break;
4533 }
4534 case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
4535 callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
4536 break;
4537 case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
4538 __ movl(temp.AsRegister<Register>(), Immediate(invoke->GetMethodAddress()));
4539 break;
4540 case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: {
4541 Register base_reg = GetInvokeStaticOrDirectExtraParameter(invoke,
4542 temp.AsRegister<Register>());
4543 __ movl(temp.AsRegister<Register>(), Address(base_reg, kDummy32BitOffset));
4544 // Bind a new fixup label at the end of the "movl" insn.
4545 uint32_t offset = invoke->GetDexCacheArrayOffset();
4546 __ Bind(NewPcRelativeDexCacheArrayPatch(
4547 invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress(),
4548 invoke->GetDexFileForPcRelativeDexCache(),
4549 offset));
4550 break;
4551 }
4552 case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: {
4553 Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
4554 Register method_reg;
4555 Register reg = temp.AsRegister<Register>();
4556 if (current_method.IsRegister()) {
4557 method_reg = current_method.AsRegister<Register>();
4558 } else {
4559 DCHECK(invoke->GetLocations()->Intrinsified());
4560 DCHECK(!current_method.IsValid());
4561 method_reg = reg;
4562 __ movl(reg, Address(ESP, kCurrentMethodStackOffset));
4563 }
4564 // /* ArtMethod*[] */ temp = temp.ptr_sized_fields_->dex_cache_resolved_methods_;
4565 __ movl(reg, Address(method_reg,
4566 ArtMethod::DexCacheResolvedMethodsOffset(kX86PointerSize).Int32Value()));
4567 // temp = temp[index_in_cache];
4568 // Note: Don't use invoke->GetTargetMethod() as it may point to a different dex file.
4569 uint32_t index_in_cache = invoke->GetDexMethodIndex();
4570 __ movl(reg, Address(reg, CodeGenerator::GetCachePointerOffset(index_in_cache)));
4571 break;
4572 }
4573 }
4574 return callee_method;
4575 }
4576
GenerateStaticOrDirectCall(HInvokeStaticOrDirect * invoke,Location temp)4577 void CodeGeneratorX86::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) {
4578 Location callee_method = GenerateCalleeMethodStaticOrDirectCall(invoke, temp);
4579
4580 switch (invoke->GetCodePtrLocation()) {
4581 case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf:
4582 __ call(GetFrameEntryLabel());
4583 break;
4584 case HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod:
4585 // (callee_method + offset_of_quick_compiled_code)()
4586 __ call(Address(callee_method.AsRegister<Register>(),
4587 ArtMethod::EntryPointFromQuickCompiledCodeOffset(
4588 kX86PointerSize).Int32Value()));
4589 break;
4590 }
4591
4592 DCHECK(!IsLeafMethod());
4593 }
4594
GenerateVirtualCall(HInvokeVirtual * invoke,Location temp_in)4595 void CodeGeneratorX86::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp_in) {
4596 Register temp = temp_in.AsRegister<Register>();
4597 uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
4598 invoke->GetVTableIndex(), kX86PointerSize).Uint32Value();
4599
4600 // Use the calling convention instead of the location of the receiver, as
4601 // intrinsics may have put the receiver in a different register. In the intrinsics
4602 // slow path, the arguments have been moved to the right place, so here we are
4603 // guaranteed that the receiver is the first register of the calling convention.
4604 InvokeDexCallingConvention calling_convention;
4605 Register receiver = calling_convention.GetRegisterAt(0);
4606 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
4607 // /* HeapReference<Class> */ temp = receiver->klass_
4608 __ movl(temp, Address(receiver, class_offset));
4609 MaybeRecordImplicitNullCheck(invoke);
4610 // Instead of simply (possibly) unpoisoning `temp` here, we should
4611 // emit a read barrier for the previous class reference load.
4612 // However this is not required in practice, as this is an
4613 // intermediate/temporary reference and because the current
4614 // concurrent copying collector keeps the from-space memory
4615 // intact/accessible until the end of the marking phase (the
4616 // concurrent copying collector may not in the future).
4617 __ MaybeUnpoisonHeapReference(temp);
4618 // temp = temp->GetMethodAt(method_offset);
4619 __ movl(temp, Address(temp, method_offset));
4620 // call temp->GetEntryPoint();
4621 __ call(Address(
4622 temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86PointerSize).Int32Value()));
4623 }
4624
RecordBootStringPatch(HLoadString * load_string)4625 void CodeGeneratorX86::RecordBootStringPatch(HLoadString* load_string) {
4626 DCHECK(GetCompilerOptions().IsBootImage());
4627 HX86ComputeBaseMethodAddress* address = nullptr;
4628 if (GetCompilerOptions().GetCompilePic()) {
4629 address = load_string->InputAt(0)->AsX86ComputeBaseMethodAddress();
4630 } else {
4631 DCHECK_EQ(load_string->InputCount(), 0u);
4632 }
4633 string_patches_.emplace_back(address,
4634 load_string->GetDexFile(),
4635 load_string->GetStringIndex().index_);
4636 __ Bind(&string_patches_.back().label);
4637 }
4638
RecordBootTypePatch(HLoadClass * load_class)4639 void CodeGeneratorX86::RecordBootTypePatch(HLoadClass* load_class) {
4640 HX86ComputeBaseMethodAddress* address = nullptr;
4641 if (GetCompilerOptions().GetCompilePic()) {
4642 address = load_class->InputAt(0)->AsX86ComputeBaseMethodAddress();
4643 } else {
4644 DCHECK_EQ(load_class->InputCount(), 0u);
4645 }
4646 boot_image_type_patches_.emplace_back(address,
4647 load_class->GetDexFile(),
4648 load_class->GetTypeIndex().index_);
4649 __ Bind(&boot_image_type_patches_.back().label);
4650 }
4651
NewTypeBssEntryPatch(HLoadClass * load_class)4652 Label* CodeGeneratorX86::NewTypeBssEntryPatch(HLoadClass* load_class) {
4653 HX86ComputeBaseMethodAddress* address =
4654 load_class->InputAt(0)->AsX86ComputeBaseMethodAddress();
4655 type_bss_entry_patches_.emplace_back(
4656 address, load_class->GetDexFile(), load_class->GetTypeIndex().index_);
4657 return &type_bss_entry_patches_.back().label;
4658 }
4659
NewStringBssEntryPatch(HLoadString * load_string)4660 Label* CodeGeneratorX86::NewStringBssEntryPatch(HLoadString* load_string) {
4661 DCHECK(!GetCompilerOptions().IsBootImage());
4662 HX86ComputeBaseMethodAddress* address =
4663 load_string->InputAt(0)->AsX86ComputeBaseMethodAddress();
4664 string_patches_.emplace_back(
4665 address, load_string->GetDexFile(), load_string->GetStringIndex().index_);
4666 return &string_patches_.back().label;
4667 }
4668
NewPcRelativeDexCacheArrayPatch(HX86ComputeBaseMethodAddress * method_address,const DexFile & dex_file,uint32_t element_offset)4669 Label* CodeGeneratorX86::NewPcRelativeDexCacheArrayPatch(
4670 HX86ComputeBaseMethodAddress* method_address,
4671 const DexFile& dex_file,
4672 uint32_t element_offset) {
4673 // Add the patch entry and bind its label at the end of the instruction.
4674 pc_relative_dex_cache_patches_.emplace_back(method_address, dex_file, element_offset);
4675 return &pc_relative_dex_cache_patches_.back().label;
4676 }
4677
4678 // The label points to the end of the "movl" or another instruction but the literal offset
4679 // for method patch needs to point to the embedded constant which occupies the last 4 bytes.
4680 constexpr uint32_t kLabelPositionToLiteralOffsetAdjustment = 4u;
4681
4682 template <LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
EmitPcRelativeLinkerPatches(const ArenaDeque<X86PcRelativePatchInfo> & infos,ArenaVector<LinkerPatch> * linker_patches)4683 inline void CodeGeneratorX86::EmitPcRelativeLinkerPatches(
4684 const ArenaDeque<X86PcRelativePatchInfo>& infos,
4685 ArenaVector<LinkerPatch>* linker_patches) {
4686 for (const X86PcRelativePatchInfo& info : infos) {
4687 uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
4688 linker_patches->push_back(Factory(
4689 literal_offset, &info.dex_file, GetMethodAddressOffset(info.method_address), info.index));
4690 }
4691 }
4692
EmitLinkerPatches(ArenaVector<LinkerPatch> * linker_patches)4693 void CodeGeneratorX86::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) {
4694 DCHECK(linker_patches->empty());
4695 size_t size =
4696 pc_relative_dex_cache_patches_.size() +
4697 string_patches_.size() +
4698 boot_image_type_patches_.size() +
4699 type_bss_entry_patches_.size();
4700 linker_patches->reserve(size);
4701 EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_,
4702 linker_patches);
4703 if (!GetCompilerOptions().IsBootImage()) {
4704 DCHECK(boot_image_type_patches_.empty());
4705 EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(string_patches_, linker_patches);
4706 } else if (GetCompilerOptions().GetCompilePic()) {
4707 EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(boot_image_type_patches_,
4708 linker_patches);
4709 EmitPcRelativeLinkerPatches<LinkerPatch::RelativeStringPatch>(string_patches_, linker_patches);
4710 } else {
4711 for (const PatchInfo<Label>& info : boot_image_type_patches_) {
4712 uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
4713 linker_patches->push_back(LinkerPatch::TypePatch(literal_offset, &info.dex_file, info.index));
4714 }
4715 for (const PatchInfo<Label>& info : string_patches_) {
4716 uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
4717 linker_patches->push_back(
4718 LinkerPatch::StringPatch(literal_offset, &info.dex_file, info.index));
4719 }
4720 }
4721 EmitPcRelativeLinkerPatches<LinkerPatch::TypeBssEntryPatch>(type_bss_entry_patches_,
4722 linker_patches);
4723 DCHECK_EQ(size, linker_patches->size());
4724 }
4725
MarkGCCard(Register temp,Register card,Register object,Register value,bool value_can_be_null)4726 void CodeGeneratorX86::MarkGCCard(Register temp,
4727 Register card,
4728 Register object,
4729 Register value,
4730 bool value_can_be_null) {
4731 NearLabel is_null;
4732 if (value_can_be_null) {
4733 __ testl(value, value);
4734 __ j(kEqual, &is_null);
4735 }
4736 __ fs()->movl(card, Address::Absolute(Thread::CardTableOffset<kX86PointerSize>().Int32Value()));
4737 __ movl(temp, object);
4738 __ shrl(temp, Immediate(gc::accounting::CardTable::kCardShift));
4739 __ movb(Address(temp, card, TIMES_1, 0),
4740 X86ManagedRegister::FromCpuRegister(card).AsByteRegister());
4741 if (value_can_be_null) {
4742 __ Bind(&is_null);
4743 }
4744 }
4745
HandleFieldGet(HInstruction * instruction,const FieldInfo & field_info)4746 void LocationsBuilderX86::HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info) {
4747 DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
4748
4749 bool object_field_get_with_read_barrier =
4750 kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot);
4751 LocationSummary* locations =
4752 new (GetGraph()->GetArena()) LocationSummary(instruction,
4753 kEmitCompilerReadBarrier ?
4754 LocationSummary::kCallOnSlowPath :
4755 LocationSummary::kNoCall);
4756 if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
4757 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
4758 }
4759 locations->SetInAt(0, Location::RequiresRegister());
4760
4761 if (Primitive::IsFloatingPointType(instruction->GetType())) {
4762 locations->SetOut(Location::RequiresFpuRegister());
4763 } else {
4764 // The output overlaps in case of long: we don't want the low move
4765 // to overwrite the object's location. Likewise, in the case of
4766 // an object field get with read barriers enabled, we do not want
4767 // the move to overwrite the object's location, as we need it to emit
4768 // the read barrier.
4769 locations->SetOut(
4770 Location::RequiresRegister(),
4771 (object_field_get_with_read_barrier || instruction->GetType() == Primitive::kPrimLong) ?
4772 Location::kOutputOverlap :
4773 Location::kNoOutputOverlap);
4774 }
4775
4776 if (field_info.IsVolatile() && (field_info.GetFieldType() == Primitive::kPrimLong)) {
4777 // Long values can be loaded atomically into an XMM using movsd.
4778 // So we use an XMM register as a temp to achieve atomicity (first
4779 // load the temp into the XMM and then copy the XMM into the
4780 // output, 32 bits at a time).
4781 locations->AddTemp(Location::RequiresFpuRegister());
4782 }
4783 }
4784
HandleFieldGet(HInstruction * instruction,const FieldInfo & field_info)4785 void InstructionCodeGeneratorX86::HandleFieldGet(HInstruction* instruction,
4786 const FieldInfo& field_info) {
4787 DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
4788
4789 LocationSummary* locations = instruction->GetLocations();
4790 Location base_loc = locations->InAt(0);
4791 Register base = base_loc.AsRegister<Register>();
4792 Location out = locations->Out();
4793 bool is_volatile = field_info.IsVolatile();
4794 Primitive::Type field_type = field_info.GetFieldType();
4795 uint32_t offset = field_info.GetFieldOffset().Uint32Value();
4796
4797 switch (field_type) {
4798 case Primitive::kPrimBoolean: {
4799 __ movzxb(out.AsRegister<Register>(), Address(base, offset));
4800 break;
4801 }
4802
4803 case Primitive::kPrimByte: {
4804 __ movsxb(out.AsRegister<Register>(), Address(base, offset));
4805 break;
4806 }
4807
4808 case Primitive::kPrimShort: {
4809 __ movsxw(out.AsRegister<Register>(), Address(base, offset));
4810 break;
4811 }
4812
4813 case Primitive::kPrimChar: {
4814 __ movzxw(out.AsRegister<Register>(), Address(base, offset));
4815 break;
4816 }
4817
4818 case Primitive::kPrimInt:
4819 __ movl(out.AsRegister<Register>(), Address(base, offset));
4820 break;
4821
4822 case Primitive::kPrimNot: {
4823 // /* HeapReference<Object> */ out = *(base + offset)
4824 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
4825 // Note that a potential implicit null check is handled in this
4826 // CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier call.
4827 codegen_->GenerateFieldLoadWithBakerReadBarrier(
4828 instruction, out, base, offset, /* needs_null_check */ true);
4829 if (is_volatile) {
4830 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
4831 }
4832 } else {
4833 __ movl(out.AsRegister<Register>(), Address(base, offset));
4834 codegen_->MaybeRecordImplicitNullCheck(instruction);
4835 if (is_volatile) {
4836 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
4837 }
4838 // If read barriers are enabled, emit read barriers other than
4839 // Baker's using a slow path (and also unpoison the loaded
4840 // reference, if heap poisoning is enabled).
4841 codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, base_loc, offset);
4842 }
4843 break;
4844 }
4845
4846 case Primitive::kPrimLong: {
4847 if (is_volatile) {
4848 XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
4849 __ movsd(temp, Address(base, offset));
4850 codegen_->MaybeRecordImplicitNullCheck(instruction);
4851 __ movd(out.AsRegisterPairLow<Register>(), temp);
4852 __ psrlq(temp, Immediate(32));
4853 __ movd(out.AsRegisterPairHigh<Register>(), temp);
4854 } else {
4855 DCHECK_NE(base, out.AsRegisterPairLow<Register>());
4856 __ movl(out.AsRegisterPairLow<Register>(), Address(base, offset));
4857 codegen_->MaybeRecordImplicitNullCheck(instruction);
4858 __ movl(out.AsRegisterPairHigh<Register>(), Address(base, kX86WordSize + offset));
4859 }
4860 break;
4861 }
4862
4863 case Primitive::kPrimFloat: {
4864 __ movss(out.AsFpuRegister<XmmRegister>(), Address(base, offset));
4865 break;
4866 }
4867
4868 case Primitive::kPrimDouble: {
4869 __ movsd(out.AsFpuRegister<XmmRegister>(), Address(base, offset));
4870 break;
4871 }
4872
4873 case Primitive::kPrimVoid:
4874 LOG(FATAL) << "Unreachable type " << field_type;
4875 UNREACHABLE();
4876 }
4877
4878 if (field_type == Primitive::kPrimNot || field_type == Primitive::kPrimLong) {
4879 // Potential implicit null checks, in the case of reference or
4880 // long fields, are handled in the previous switch statement.
4881 } else {
4882 codegen_->MaybeRecordImplicitNullCheck(instruction);
4883 }
4884
4885 if (is_volatile) {
4886 if (field_type == Primitive::kPrimNot) {
4887 // Memory barriers, in the case of references, are also handled
4888 // in the previous switch statement.
4889 } else {
4890 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
4891 }
4892 }
4893 }
4894
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info)4895 void LocationsBuilderX86::HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info) {
4896 DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
4897
4898 LocationSummary* locations =
4899 new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
4900 locations->SetInAt(0, Location::RequiresRegister());
4901 bool is_volatile = field_info.IsVolatile();
4902 Primitive::Type field_type = field_info.GetFieldType();
4903 bool is_byte_type = (field_type == Primitive::kPrimBoolean)
4904 || (field_type == Primitive::kPrimByte);
4905
4906 // The register allocator does not support multiple
4907 // inputs that die at entry with one in a specific register.
4908 if (is_byte_type) {
4909 // Ensure the value is in a byte register.
4910 locations->SetInAt(1, Location::RegisterLocation(EAX));
4911 } else if (Primitive::IsFloatingPointType(field_type)) {
4912 if (is_volatile && field_type == Primitive::kPrimDouble) {
4913 // In order to satisfy the semantics of volatile, this must be a single instruction store.
4914 locations->SetInAt(1, Location::RequiresFpuRegister());
4915 } else {
4916 locations->SetInAt(1, Location::FpuRegisterOrConstant(instruction->InputAt(1)));
4917 }
4918 } else if (is_volatile && field_type == Primitive::kPrimLong) {
4919 // In order to satisfy the semantics of volatile, this must be a single instruction store.
4920 locations->SetInAt(1, Location::RequiresRegister());
4921
4922 // 64bits value can be atomically written to an address with movsd and an XMM register.
4923 // We need two XMM registers because there's no easier way to (bit) copy a register pair
4924 // into a single XMM register (we copy each pair part into the XMMs and then interleave them).
4925 // NB: We could make the register allocator understand fp_reg <-> core_reg moves but given the
4926 // isolated cases when we need this it isn't worth adding the extra complexity.
4927 locations->AddTemp(Location::RequiresFpuRegister());
4928 locations->AddTemp(Location::RequiresFpuRegister());
4929 } else {
4930 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
4931
4932 if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) {
4933 // Temporary registers for the write barrier.
4934 locations->AddTemp(Location::RequiresRegister()); // May be used for reference poisoning too.
4935 // Ensure the card is in a byte register.
4936 locations->AddTemp(Location::RegisterLocation(ECX));
4937 }
4938 }
4939 }
4940
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info,bool value_can_be_null)4941 void InstructionCodeGeneratorX86::HandleFieldSet(HInstruction* instruction,
4942 const FieldInfo& field_info,
4943 bool value_can_be_null) {
4944 DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
4945
4946 LocationSummary* locations = instruction->GetLocations();
4947 Register base = locations->InAt(0).AsRegister<Register>();
4948 Location value = locations->InAt(1);
4949 bool is_volatile = field_info.IsVolatile();
4950 Primitive::Type field_type = field_info.GetFieldType();
4951 uint32_t offset = field_info.GetFieldOffset().Uint32Value();
4952 bool needs_write_barrier =
4953 CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1));
4954
4955 if (is_volatile) {
4956 codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
4957 }
4958
4959 bool maybe_record_implicit_null_check_done = false;
4960
4961 switch (field_type) {
4962 case Primitive::kPrimBoolean:
4963 case Primitive::kPrimByte: {
4964 __ movb(Address(base, offset), value.AsRegister<ByteRegister>());
4965 break;
4966 }
4967
4968 case Primitive::kPrimShort:
4969 case Primitive::kPrimChar: {
4970 if (value.IsConstant()) {
4971 int16_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
4972 __ movw(Address(base, offset), Immediate(v));
4973 } else {
4974 __ movw(Address(base, offset), value.AsRegister<Register>());
4975 }
4976 break;
4977 }
4978
4979 case Primitive::kPrimInt:
4980 case Primitive::kPrimNot: {
4981 if (kPoisonHeapReferences && needs_write_barrier) {
4982 // Note that in the case where `value` is a null reference,
4983 // we do not enter this block, as the reference does not
4984 // need poisoning.
4985 DCHECK_EQ(field_type, Primitive::kPrimNot);
4986 Register temp = locations->GetTemp(0).AsRegister<Register>();
4987 __ movl(temp, value.AsRegister<Register>());
4988 __ PoisonHeapReference(temp);
4989 __ movl(Address(base, offset), temp);
4990 } else if (value.IsConstant()) {
4991 int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
4992 __ movl(Address(base, offset), Immediate(v));
4993 } else {
4994 DCHECK(value.IsRegister()) << value;
4995 __ movl(Address(base, offset), value.AsRegister<Register>());
4996 }
4997 break;
4998 }
4999
5000 case Primitive::kPrimLong: {
5001 if (is_volatile) {
5002 XmmRegister temp1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
5003 XmmRegister temp2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
5004 __ movd(temp1, value.AsRegisterPairLow<Register>());
5005 __ movd(temp2, value.AsRegisterPairHigh<Register>());
5006 __ punpckldq(temp1, temp2);
5007 __ movsd(Address(base, offset), temp1);
5008 codegen_->MaybeRecordImplicitNullCheck(instruction);
5009 } else if (value.IsConstant()) {
5010 int64_t v = CodeGenerator::GetInt64ValueOf(value.GetConstant());
5011 __ movl(Address(base, offset), Immediate(Low32Bits(v)));
5012 codegen_->MaybeRecordImplicitNullCheck(instruction);
5013 __ movl(Address(base, kX86WordSize + offset), Immediate(High32Bits(v)));
5014 } else {
5015 __ movl(Address(base, offset), value.AsRegisterPairLow<Register>());
5016 codegen_->MaybeRecordImplicitNullCheck(instruction);
5017 __ movl(Address(base, kX86WordSize + offset), value.AsRegisterPairHigh<Register>());
5018 }
5019 maybe_record_implicit_null_check_done = true;
5020 break;
5021 }
5022
5023 case Primitive::kPrimFloat: {
5024 if (value.IsConstant()) {
5025 int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
5026 __ movl(Address(base, offset), Immediate(v));
5027 } else {
5028 __ movss(Address(base, offset), value.AsFpuRegister<XmmRegister>());
5029 }
5030 break;
5031 }
5032
5033 case Primitive::kPrimDouble: {
5034 if (value.IsConstant()) {
5035 int64_t v = CodeGenerator::GetInt64ValueOf(value.GetConstant());
5036 __ movl(Address(base, offset), Immediate(Low32Bits(v)));
5037 codegen_->MaybeRecordImplicitNullCheck(instruction);
5038 __ movl(Address(base, kX86WordSize + offset), Immediate(High32Bits(v)));
5039 maybe_record_implicit_null_check_done = true;
5040 } else {
5041 __ movsd(Address(base, offset), value.AsFpuRegister<XmmRegister>());
5042 }
5043 break;
5044 }
5045
5046 case Primitive::kPrimVoid:
5047 LOG(FATAL) << "Unreachable type " << field_type;
5048 UNREACHABLE();
5049 }
5050
5051 if (!maybe_record_implicit_null_check_done) {
5052 codegen_->MaybeRecordImplicitNullCheck(instruction);
5053 }
5054
5055 if (needs_write_barrier) {
5056 Register temp = locations->GetTemp(0).AsRegister<Register>();
5057 Register card = locations->GetTemp(1).AsRegister<Register>();
5058 codegen_->MarkGCCard(temp, card, base, value.AsRegister<Register>(), value_can_be_null);
5059 }
5060
5061 if (is_volatile) {
5062 codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
5063 }
5064 }
5065
VisitStaticFieldGet(HStaticFieldGet * instruction)5066 void LocationsBuilderX86::VisitStaticFieldGet(HStaticFieldGet* instruction) {
5067 HandleFieldGet(instruction, instruction->GetFieldInfo());
5068 }
5069
VisitStaticFieldGet(HStaticFieldGet * instruction)5070 void InstructionCodeGeneratorX86::VisitStaticFieldGet(HStaticFieldGet* instruction) {
5071 HandleFieldGet(instruction, instruction->GetFieldInfo());
5072 }
5073
VisitStaticFieldSet(HStaticFieldSet * instruction)5074 void LocationsBuilderX86::VisitStaticFieldSet(HStaticFieldSet* instruction) {
5075 HandleFieldSet(instruction, instruction->GetFieldInfo());
5076 }
5077
VisitStaticFieldSet(HStaticFieldSet * instruction)5078 void InstructionCodeGeneratorX86::VisitStaticFieldSet(HStaticFieldSet* instruction) {
5079 HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
5080 }
5081
VisitInstanceFieldSet(HInstanceFieldSet * instruction)5082 void LocationsBuilderX86::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
5083 HandleFieldSet(instruction, instruction->GetFieldInfo());
5084 }
5085
VisitInstanceFieldSet(HInstanceFieldSet * instruction)5086 void InstructionCodeGeneratorX86::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
5087 HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
5088 }
5089
VisitInstanceFieldGet(HInstanceFieldGet * instruction)5090 void LocationsBuilderX86::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
5091 HandleFieldGet(instruction, instruction->GetFieldInfo());
5092 }
5093
VisitInstanceFieldGet(HInstanceFieldGet * instruction)5094 void InstructionCodeGeneratorX86::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
5095 HandleFieldGet(instruction, instruction->GetFieldInfo());
5096 }
5097
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)5098 void LocationsBuilderX86::VisitUnresolvedInstanceFieldGet(
5099 HUnresolvedInstanceFieldGet* instruction) {
5100 FieldAccessCallingConventionX86 calling_convention;
5101 codegen_->CreateUnresolvedFieldLocationSummary(
5102 instruction, instruction->GetFieldType(), calling_convention);
5103 }
5104
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)5105 void InstructionCodeGeneratorX86::VisitUnresolvedInstanceFieldGet(
5106 HUnresolvedInstanceFieldGet* instruction) {
5107 FieldAccessCallingConventionX86 calling_convention;
5108 codegen_->GenerateUnresolvedFieldAccess(instruction,
5109 instruction->GetFieldType(),
5110 instruction->GetFieldIndex(),
5111 instruction->GetDexPc(),
5112 calling_convention);
5113 }
5114
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)5115 void LocationsBuilderX86::VisitUnresolvedInstanceFieldSet(
5116 HUnresolvedInstanceFieldSet* instruction) {
5117 FieldAccessCallingConventionX86 calling_convention;
5118 codegen_->CreateUnresolvedFieldLocationSummary(
5119 instruction, instruction->GetFieldType(), calling_convention);
5120 }
5121
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)5122 void InstructionCodeGeneratorX86::VisitUnresolvedInstanceFieldSet(
5123 HUnresolvedInstanceFieldSet* instruction) {
5124 FieldAccessCallingConventionX86 calling_convention;
5125 codegen_->GenerateUnresolvedFieldAccess(instruction,
5126 instruction->GetFieldType(),
5127 instruction->GetFieldIndex(),
5128 instruction->GetDexPc(),
5129 calling_convention);
5130 }
5131
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)5132 void LocationsBuilderX86::VisitUnresolvedStaticFieldGet(
5133 HUnresolvedStaticFieldGet* instruction) {
5134 FieldAccessCallingConventionX86 calling_convention;
5135 codegen_->CreateUnresolvedFieldLocationSummary(
5136 instruction, instruction->GetFieldType(), calling_convention);
5137 }
5138
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)5139 void InstructionCodeGeneratorX86::VisitUnresolvedStaticFieldGet(
5140 HUnresolvedStaticFieldGet* instruction) {
5141 FieldAccessCallingConventionX86 calling_convention;
5142 codegen_->GenerateUnresolvedFieldAccess(instruction,
5143 instruction->GetFieldType(),
5144 instruction->GetFieldIndex(),
5145 instruction->GetDexPc(),
5146 calling_convention);
5147 }
5148
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)5149 void LocationsBuilderX86::VisitUnresolvedStaticFieldSet(
5150 HUnresolvedStaticFieldSet* instruction) {
5151 FieldAccessCallingConventionX86 calling_convention;
5152 codegen_->CreateUnresolvedFieldLocationSummary(
5153 instruction, instruction->GetFieldType(), calling_convention);
5154 }
5155
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)5156 void InstructionCodeGeneratorX86::VisitUnresolvedStaticFieldSet(
5157 HUnresolvedStaticFieldSet* instruction) {
5158 FieldAccessCallingConventionX86 calling_convention;
5159 codegen_->GenerateUnresolvedFieldAccess(instruction,
5160 instruction->GetFieldType(),
5161 instruction->GetFieldIndex(),
5162 instruction->GetDexPc(),
5163 calling_convention);
5164 }
5165
VisitNullCheck(HNullCheck * instruction)5166 void LocationsBuilderX86::VisitNullCheck(HNullCheck* instruction) {
5167 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
5168 Location loc = codegen_->GetCompilerOptions().GetImplicitNullChecks()
5169 ? Location::RequiresRegister()
5170 : Location::Any();
5171 locations->SetInAt(0, loc);
5172 }
5173
GenerateImplicitNullCheck(HNullCheck * instruction)5174 void CodeGeneratorX86::GenerateImplicitNullCheck(HNullCheck* instruction) {
5175 if (CanMoveNullCheckToUser(instruction)) {
5176 return;
5177 }
5178 LocationSummary* locations = instruction->GetLocations();
5179 Location obj = locations->InAt(0);
5180
5181 __ testl(EAX, Address(obj.AsRegister<Register>(), 0));
5182 RecordPcInfo(instruction, instruction->GetDexPc());
5183 }
5184
GenerateExplicitNullCheck(HNullCheck * instruction)5185 void CodeGeneratorX86::GenerateExplicitNullCheck(HNullCheck* instruction) {
5186 SlowPathCode* slow_path = new (GetGraph()->GetArena()) NullCheckSlowPathX86(instruction);
5187 AddSlowPath(slow_path);
5188
5189 LocationSummary* locations = instruction->GetLocations();
5190 Location obj = locations->InAt(0);
5191
5192 if (obj.IsRegister()) {
5193 __ testl(obj.AsRegister<Register>(), obj.AsRegister<Register>());
5194 } else if (obj.IsStackSlot()) {
5195 __ cmpl(Address(ESP, obj.GetStackIndex()), Immediate(0));
5196 } else {
5197 DCHECK(obj.IsConstant()) << obj;
5198 DCHECK(obj.GetConstant()->IsNullConstant());
5199 __ jmp(slow_path->GetEntryLabel());
5200 return;
5201 }
5202 __ j(kEqual, slow_path->GetEntryLabel());
5203 }
5204
VisitNullCheck(HNullCheck * instruction)5205 void InstructionCodeGeneratorX86::VisitNullCheck(HNullCheck* instruction) {
5206 codegen_->GenerateNullCheck(instruction);
5207 }
5208
VisitArrayGet(HArrayGet * instruction)5209 void LocationsBuilderX86::VisitArrayGet(HArrayGet* instruction) {
5210 bool object_array_get_with_read_barrier =
5211 kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot);
5212 LocationSummary* locations =
5213 new (GetGraph()->GetArena()) LocationSummary(instruction,
5214 object_array_get_with_read_barrier ?
5215 LocationSummary::kCallOnSlowPath :
5216 LocationSummary::kNoCall);
5217 if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
5218 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
5219 }
5220 locations->SetInAt(0, Location::RequiresRegister());
5221 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
5222 if (Primitive::IsFloatingPointType(instruction->GetType())) {
5223 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
5224 } else {
5225 // The output overlaps in case of long: we don't want the low move
5226 // to overwrite the array's location. Likewise, in the case of an
5227 // object array get with read barriers enabled, we do not want the
5228 // move to overwrite the array's location, as we need it to emit
5229 // the read barrier.
5230 locations->SetOut(
5231 Location::RequiresRegister(),
5232 (instruction->GetType() == Primitive::kPrimLong || object_array_get_with_read_barrier) ?
5233 Location::kOutputOverlap :
5234 Location::kNoOutputOverlap);
5235 }
5236 }
5237
VisitArrayGet(HArrayGet * instruction)5238 void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) {
5239 LocationSummary* locations = instruction->GetLocations();
5240 Location obj_loc = locations->InAt(0);
5241 Register obj = obj_loc.AsRegister<Register>();
5242 Location index = locations->InAt(1);
5243 Location out_loc = locations->Out();
5244 uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
5245
5246 Primitive::Type type = instruction->GetType();
5247 switch (type) {
5248 case Primitive::kPrimBoolean: {
5249 Register out = out_loc.AsRegister<Register>();
5250 __ movzxb(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_1, data_offset));
5251 break;
5252 }
5253
5254 case Primitive::kPrimByte: {
5255 Register out = out_loc.AsRegister<Register>();
5256 __ movsxb(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_1, data_offset));
5257 break;
5258 }
5259
5260 case Primitive::kPrimShort: {
5261 Register out = out_loc.AsRegister<Register>();
5262 __ movsxw(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_2, data_offset));
5263 break;
5264 }
5265
5266 case Primitive::kPrimChar: {
5267 Register out = out_loc.AsRegister<Register>();
5268 if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
5269 // Branch cases into compressed and uncompressed for each index's type.
5270 uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
5271 NearLabel done, not_compressed;
5272 __ testb(Address(obj, count_offset), Immediate(1));
5273 codegen_->MaybeRecordImplicitNullCheck(instruction);
5274 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
5275 "Expecting 0=compressed, 1=uncompressed");
5276 __ j(kNotZero, ¬_compressed);
5277 __ movzxb(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_1, data_offset));
5278 __ jmp(&done);
5279 __ Bind(¬_compressed);
5280 __ movzxw(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_2, data_offset));
5281 __ Bind(&done);
5282 } else {
5283 // Common case for charAt of array of char or when string compression's
5284 // feature is turned off.
5285 __ movzxw(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_2, data_offset));
5286 }
5287 break;
5288 }
5289
5290 case Primitive::kPrimInt: {
5291 Register out = out_loc.AsRegister<Register>();
5292 __ movl(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_4, data_offset));
5293 break;
5294 }
5295
5296 case Primitive::kPrimNot: {
5297 static_assert(
5298 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
5299 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
5300 // /* HeapReference<Object> */ out =
5301 // *(obj + data_offset + index * sizeof(HeapReference<Object>))
5302 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
5303 // Note that a potential implicit null check is handled in this
5304 // CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier call.
5305 codegen_->GenerateArrayLoadWithBakerReadBarrier(
5306 instruction, out_loc, obj, data_offset, index, /* needs_null_check */ true);
5307 } else {
5308 Register out = out_loc.AsRegister<Register>();
5309 __ movl(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_4, data_offset));
5310 codegen_->MaybeRecordImplicitNullCheck(instruction);
5311 // If read barriers are enabled, emit read barriers other than
5312 // Baker's using a slow path (and also unpoison the loaded
5313 // reference, if heap poisoning is enabled).
5314 if (index.IsConstant()) {
5315 uint32_t offset =
5316 (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
5317 codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset);
5318 } else {
5319 codegen_->MaybeGenerateReadBarrierSlow(
5320 instruction, out_loc, out_loc, obj_loc, data_offset, index);
5321 }
5322 }
5323 break;
5324 }
5325
5326 case Primitive::kPrimLong: {
5327 DCHECK_NE(obj, out_loc.AsRegisterPairLow<Register>());
5328 __ movl(out_loc.AsRegisterPairLow<Register>(),
5329 CodeGeneratorX86::ArrayAddress(obj, index, TIMES_8, data_offset));
5330 codegen_->MaybeRecordImplicitNullCheck(instruction);
5331 __ movl(out_loc.AsRegisterPairHigh<Register>(),
5332 CodeGeneratorX86::ArrayAddress(obj, index, TIMES_8, data_offset + kX86WordSize));
5333 break;
5334 }
5335
5336 case Primitive::kPrimFloat: {
5337 XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
5338 __ movss(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_4, data_offset));
5339 break;
5340 }
5341
5342 case Primitive::kPrimDouble: {
5343 XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
5344 __ movsd(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_8, data_offset));
5345 break;
5346 }
5347
5348 case Primitive::kPrimVoid:
5349 LOG(FATAL) << "Unreachable type " << type;
5350 UNREACHABLE();
5351 }
5352
5353 if (type == Primitive::kPrimNot || type == Primitive::kPrimLong) {
5354 // Potential implicit null checks, in the case of reference or
5355 // long arrays, are handled in the previous switch statement.
5356 } else {
5357 codegen_->MaybeRecordImplicitNullCheck(instruction);
5358 }
5359 }
5360
VisitArraySet(HArraySet * instruction)5361 void LocationsBuilderX86::VisitArraySet(HArraySet* instruction) {
5362 Primitive::Type value_type = instruction->GetComponentType();
5363
5364 bool needs_write_barrier =
5365 CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
5366 bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
5367
5368 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
5369 instruction,
5370 may_need_runtime_call_for_type_check ?
5371 LocationSummary::kCallOnSlowPath :
5372 LocationSummary::kNoCall);
5373
5374 bool is_byte_type = (value_type == Primitive::kPrimBoolean)
5375 || (value_type == Primitive::kPrimByte);
5376 // We need the inputs to be different than the output in case of long operation.
5377 // In case of a byte operation, the register allocator does not support multiple
5378 // inputs that die at entry with one in a specific register.
5379 locations->SetInAt(0, Location::RequiresRegister());
5380 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
5381 if (is_byte_type) {
5382 // Ensure the value is in a byte register.
5383 locations->SetInAt(2, Location::ByteRegisterOrConstant(EAX, instruction->InputAt(2)));
5384 } else if (Primitive::IsFloatingPointType(value_type)) {
5385 locations->SetInAt(2, Location::FpuRegisterOrConstant(instruction->InputAt(2)));
5386 } else {
5387 locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2)));
5388 }
5389 if (needs_write_barrier) {
5390 // Temporary registers for the write barrier.
5391 locations->AddTemp(Location::RequiresRegister()); // Possibly used for ref. poisoning too.
5392 // Ensure the card is in a byte register.
5393 locations->AddTemp(Location::RegisterLocation(ECX));
5394 }
5395 }
5396
VisitArraySet(HArraySet * instruction)5397 void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) {
5398 LocationSummary* locations = instruction->GetLocations();
5399 Location array_loc = locations->InAt(0);
5400 Register array = array_loc.AsRegister<Register>();
5401 Location index = locations->InAt(1);
5402 Location value = locations->InAt(2);
5403 Primitive::Type value_type = instruction->GetComponentType();
5404 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
5405 uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
5406 uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
5407 bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
5408 bool needs_write_barrier =
5409 CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
5410
5411 switch (value_type) {
5412 case Primitive::kPrimBoolean:
5413 case Primitive::kPrimByte: {
5414 uint32_t offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
5415 Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_1, offset);
5416 if (value.IsRegister()) {
5417 __ movb(address, value.AsRegister<ByteRegister>());
5418 } else {
5419 __ movb(address, Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
5420 }
5421 codegen_->MaybeRecordImplicitNullCheck(instruction);
5422 break;
5423 }
5424
5425 case Primitive::kPrimShort:
5426 case Primitive::kPrimChar: {
5427 uint32_t offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
5428 Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_2, offset);
5429 if (value.IsRegister()) {
5430 __ movw(address, value.AsRegister<Register>());
5431 } else {
5432 __ movw(address, Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
5433 }
5434 codegen_->MaybeRecordImplicitNullCheck(instruction);
5435 break;
5436 }
5437
5438 case Primitive::kPrimNot: {
5439 uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
5440 Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_4, offset);
5441
5442 if (!value.IsRegister()) {
5443 // Just setting null.
5444 DCHECK(instruction->InputAt(2)->IsNullConstant());
5445 DCHECK(value.IsConstant()) << value;
5446 __ movl(address, Immediate(0));
5447 codegen_->MaybeRecordImplicitNullCheck(instruction);
5448 DCHECK(!needs_write_barrier);
5449 DCHECK(!may_need_runtime_call_for_type_check);
5450 break;
5451 }
5452
5453 DCHECK(needs_write_barrier);
5454 Register register_value = value.AsRegister<Register>();
5455 // We cannot use a NearLabel for `done`, as its range may be too
5456 // short when Baker read barriers are enabled.
5457 Label done;
5458 NearLabel not_null, do_put;
5459 SlowPathCode* slow_path = nullptr;
5460 Location temp_loc = locations->GetTemp(0);
5461 Register temp = temp_loc.AsRegister<Register>();
5462 if (may_need_runtime_call_for_type_check) {
5463 slow_path = new (GetGraph()->GetArena()) ArraySetSlowPathX86(instruction);
5464 codegen_->AddSlowPath(slow_path);
5465 if (instruction->GetValueCanBeNull()) {
5466 __ testl(register_value, register_value);
5467 __ j(kNotEqual, ¬_null);
5468 __ movl(address, Immediate(0));
5469 codegen_->MaybeRecordImplicitNullCheck(instruction);
5470 __ jmp(&done);
5471 __ Bind(¬_null);
5472 }
5473
5474 // Note that when Baker read barriers are enabled, the type
5475 // checks are performed without read barriers. This is fine,
5476 // even in the case where a class object is in the from-space
5477 // after the flip, as a comparison involving such a type would
5478 // not produce a false positive; it may of course produce a
5479 // false negative, in which case we would take the ArraySet
5480 // slow path.
5481
5482 // /* HeapReference<Class> */ temp = array->klass_
5483 __ movl(temp, Address(array, class_offset));
5484 codegen_->MaybeRecordImplicitNullCheck(instruction);
5485 __ MaybeUnpoisonHeapReference(temp);
5486
5487 // /* HeapReference<Class> */ temp = temp->component_type_
5488 __ movl(temp, Address(temp, component_offset));
5489 // If heap poisoning is enabled, no need to unpoison `temp`
5490 // nor the object reference in `register_value->klass`, as
5491 // we are comparing two poisoned references.
5492 __ cmpl(temp, Address(register_value, class_offset));
5493
5494 if (instruction->StaticTypeOfArrayIsObjectArray()) {
5495 __ j(kEqual, &do_put);
5496 // If heap poisoning is enabled, the `temp` reference has
5497 // not been unpoisoned yet; unpoison it now.
5498 __ MaybeUnpoisonHeapReference(temp);
5499
5500 // If heap poisoning is enabled, no need to unpoison the
5501 // heap reference loaded below, as it is only used for a
5502 // comparison with null.
5503 __ cmpl(Address(temp, super_offset), Immediate(0));
5504 __ j(kNotEqual, slow_path->GetEntryLabel());
5505 __ Bind(&do_put);
5506 } else {
5507 __ j(kNotEqual, slow_path->GetEntryLabel());
5508 }
5509 }
5510
5511 if (kPoisonHeapReferences) {
5512 __ movl(temp, register_value);
5513 __ PoisonHeapReference(temp);
5514 __ movl(address, temp);
5515 } else {
5516 __ movl(address, register_value);
5517 }
5518 if (!may_need_runtime_call_for_type_check) {
5519 codegen_->MaybeRecordImplicitNullCheck(instruction);
5520 }
5521
5522 Register card = locations->GetTemp(1).AsRegister<Register>();
5523 codegen_->MarkGCCard(
5524 temp, card, array, value.AsRegister<Register>(), instruction->GetValueCanBeNull());
5525 __ Bind(&done);
5526
5527 if (slow_path != nullptr) {
5528 __ Bind(slow_path->GetExitLabel());
5529 }
5530
5531 break;
5532 }
5533
5534 case Primitive::kPrimInt: {
5535 uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
5536 Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_4, offset);
5537 if (value.IsRegister()) {
5538 __ movl(address, value.AsRegister<Register>());
5539 } else {
5540 DCHECK(value.IsConstant()) << value;
5541 int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
5542 __ movl(address, Immediate(v));
5543 }
5544 codegen_->MaybeRecordImplicitNullCheck(instruction);
5545 break;
5546 }
5547
5548 case Primitive::kPrimLong: {
5549 uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
5550 if (value.IsRegisterPair()) {
5551 __ movl(CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, data_offset),
5552 value.AsRegisterPairLow<Register>());
5553 codegen_->MaybeRecordImplicitNullCheck(instruction);
5554 __ movl(CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, data_offset + kX86WordSize),
5555 value.AsRegisterPairHigh<Register>());
5556 } else {
5557 DCHECK(value.IsConstant());
5558 int64_t val = value.GetConstant()->AsLongConstant()->GetValue();
5559 __ movl(CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, data_offset),
5560 Immediate(Low32Bits(val)));
5561 codegen_->MaybeRecordImplicitNullCheck(instruction);
5562 __ movl(CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, data_offset + kX86WordSize),
5563 Immediate(High32Bits(val)));
5564 }
5565 break;
5566 }
5567
5568 case Primitive::kPrimFloat: {
5569 uint32_t offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
5570 Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_4, offset);
5571 if (value.IsFpuRegister()) {
5572 __ movss(address, value.AsFpuRegister<XmmRegister>());
5573 } else {
5574 DCHECK(value.IsConstant());
5575 int32_t v = bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue());
5576 __ movl(address, Immediate(v));
5577 }
5578 codegen_->MaybeRecordImplicitNullCheck(instruction);
5579 break;
5580 }
5581
5582 case Primitive::kPrimDouble: {
5583 uint32_t offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
5584 Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, offset);
5585 if (value.IsFpuRegister()) {
5586 __ movsd(address, value.AsFpuRegister<XmmRegister>());
5587 } else {
5588 DCHECK(value.IsConstant());
5589 Address address_hi =
5590 CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, offset + kX86WordSize);
5591 int64_t v = bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue());
5592 __ movl(address, Immediate(Low32Bits(v)));
5593 codegen_->MaybeRecordImplicitNullCheck(instruction);
5594 __ movl(address_hi, Immediate(High32Bits(v)));
5595 }
5596 break;
5597 }
5598
5599 case Primitive::kPrimVoid:
5600 LOG(FATAL) << "Unreachable type " << instruction->GetType();
5601 UNREACHABLE();
5602 }
5603 }
5604
VisitArrayLength(HArrayLength * instruction)5605 void LocationsBuilderX86::VisitArrayLength(HArrayLength* instruction) {
5606 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
5607 locations->SetInAt(0, Location::RequiresRegister());
5608 if (!instruction->IsEmittedAtUseSite()) {
5609 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5610 }
5611 }
5612
VisitArrayLength(HArrayLength * instruction)5613 void InstructionCodeGeneratorX86::VisitArrayLength(HArrayLength* instruction) {
5614 if (instruction->IsEmittedAtUseSite()) {
5615 return;
5616 }
5617
5618 LocationSummary* locations = instruction->GetLocations();
5619 uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction);
5620 Register obj = locations->InAt(0).AsRegister<Register>();
5621 Register out = locations->Out().AsRegister<Register>();
5622 __ movl(out, Address(obj, offset));
5623 codegen_->MaybeRecordImplicitNullCheck(instruction);
5624 // Mask out most significant bit in case the array is String's array of char.
5625 if (mirror::kUseStringCompression && instruction->IsStringLength()) {
5626 __ shrl(out, Immediate(1));
5627 }
5628 }
5629
VisitBoundsCheck(HBoundsCheck * instruction)5630 void LocationsBuilderX86::VisitBoundsCheck(HBoundsCheck* instruction) {
5631 RegisterSet caller_saves = RegisterSet::Empty();
5632 InvokeRuntimeCallingConvention calling_convention;
5633 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
5634 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
5635 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves);
5636 locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
5637 HInstruction* length = instruction->InputAt(1);
5638 if (!length->IsEmittedAtUseSite()) {
5639 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
5640 }
5641 // Need register to see array's length.
5642 if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
5643 locations->AddTemp(Location::RequiresRegister());
5644 }
5645 }
5646
VisitBoundsCheck(HBoundsCheck * instruction)5647 void InstructionCodeGeneratorX86::VisitBoundsCheck(HBoundsCheck* instruction) {
5648 const bool is_string_compressed_char_at =
5649 mirror::kUseStringCompression && instruction->IsStringCharAt();
5650 LocationSummary* locations = instruction->GetLocations();
5651 Location index_loc = locations->InAt(0);
5652 Location length_loc = locations->InAt(1);
5653 SlowPathCode* slow_path =
5654 new (GetGraph()->GetArena()) BoundsCheckSlowPathX86(instruction);
5655
5656 if (length_loc.IsConstant()) {
5657 int32_t length = CodeGenerator::GetInt32ValueOf(length_loc.GetConstant());
5658 if (index_loc.IsConstant()) {
5659 // BCE will remove the bounds check if we are guarenteed to pass.
5660 int32_t index = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
5661 if (index < 0 || index >= length) {
5662 codegen_->AddSlowPath(slow_path);
5663 __ jmp(slow_path->GetEntryLabel());
5664 } else {
5665 // Some optimization after BCE may have generated this, and we should not
5666 // generate a bounds check if it is a valid range.
5667 }
5668 return;
5669 }
5670
5671 // We have to reverse the jump condition because the length is the constant.
5672 Register index_reg = index_loc.AsRegister<Register>();
5673 __ cmpl(index_reg, Immediate(length));
5674 codegen_->AddSlowPath(slow_path);
5675 __ j(kAboveEqual, slow_path->GetEntryLabel());
5676 } else {
5677 HInstruction* array_length = instruction->InputAt(1);
5678 if (array_length->IsEmittedAtUseSite()) {
5679 // Address the length field in the array.
5680 DCHECK(array_length->IsArrayLength());
5681 uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(array_length->AsArrayLength());
5682 Location array_loc = array_length->GetLocations()->InAt(0);
5683 Address array_len(array_loc.AsRegister<Register>(), len_offset);
5684 if (is_string_compressed_char_at) {
5685 // TODO: if index_loc.IsConstant(), compare twice the index (to compensate for
5686 // the string compression flag) with the in-memory length and avoid the temporary.
5687 Register length_reg = locations->GetTemp(0).AsRegister<Register>();
5688 __ movl(length_reg, array_len);
5689 codegen_->MaybeRecordImplicitNullCheck(array_length);
5690 __ shrl(length_reg, Immediate(1));
5691 codegen_->GenerateIntCompare(length_reg, index_loc);
5692 } else {
5693 // Checking bounds for general case:
5694 // Array of char or string's array with feature compression off.
5695 if (index_loc.IsConstant()) {
5696 int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
5697 __ cmpl(array_len, Immediate(value));
5698 } else {
5699 __ cmpl(array_len, index_loc.AsRegister<Register>());
5700 }
5701 codegen_->MaybeRecordImplicitNullCheck(array_length);
5702 }
5703 } else {
5704 codegen_->GenerateIntCompare(length_loc, index_loc);
5705 }
5706 codegen_->AddSlowPath(slow_path);
5707 __ j(kBelowEqual, slow_path->GetEntryLabel());
5708 }
5709 }
5710
VisitParallelMove(HParallelMove * instruction ATTRIBUTE_UNUSED)5711 void LocationsBuilderX86::VisitParallelMove(HParallelMove* instruction ATTRIBUTE_UNUSED) {
5712 LOG(FATAL) << "Unreachable";
5713 }
5714
VisitParallelMove(HParallelMove * instruction)5715 void InstructionCodeGeneratorX86::VisitParallelMove(HParallelMove* instruction) {
5716 codegen_->GetMoveResolver()->EmitNativeCode(instruction);
5717 }
5718
VisitSuspendCheck(HSuspendCheck * instruction)5719 void LocationsBuilderX86::VisitSuspendCheck(HSuspendCheck* instruction) {
5720 LocationSummary* locations =
5721 new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath);
5722 // In suspend check slow path, usually there are no caller-save registers at all.
5723 // If SIMD instructions are present, however, we force spilling all live SIMD
5724 // registers in full width (since the runtime only saves/restores lower part).
5725 locations->SetCustomSlowPathCallerSaves(
5726 GetGraph()->HasSIMD() ? RegisterSet::AllFpu() : RegisterSet::Empty());
5727 }
5728
VisitSuspendCheck(HSuspendCheck * instruction)5729 void InstructionCodeGeneratorX86::VisitSuspendCheck(HSuspendCheck* instruction) {
5730 HBasicBlock* block = instruction->GetBlock();
5731 if (block->GetLoopInformation() != nullptr) {
5732 DCHECK(block->GetLoopInformation()->GetSuspendCheck() == instruction);
5733 // The back edge will generate the suspend check.
5734 return;
5735 }
5736 if (block->IsEntryBlock() && instruction->GetNext()->IsGoto()) {
5737 // The goto will generate the suspend check.
5738 return;
5739 }
5740 GenerateSuspendCheck(instruction, nullptr);
5741 }
5742
GenerateSuspendCheck(HSuspendCheck * instruction,HBasicBlock * successor)5743 void InstructionCodeGeneratorX86::GenerateSuspendCheck(HSuspendCheck* instruction,
5744 HBasicBlock* successor) {
5745 SuspendCheckSlowPathX86* slow_path =
5746 down_cast<SuspendCheckSlowPathX86*>(instruction->GetSlowPath());
5747 if (slow_path == nullptr) {
5748 slow_path = new (GetGraph()->GetArena()) SuspendCheckSlowPathX86(instruction, successor);
5749 instruction->SetSlowPath(slow_path);
5750 codegen_->AddSlowPath(slow_path);
5751 if (successor != nullptr) {
5752 DCHECK(successor->IsLoopHeader());
5753 codegen_->ClearSpillSlotsFromLoopPhisInStackMap(instruction);
5754 }
5755 } else {
5756 DCHECK_EQ(slow_path->GetSuccessor(), successor);
5757 }
5758
5759 __ fs()->cmpw(Address::Absolute(Thread::ThreadFlagsOffset<kX86PointerSize>().Int32Value()),
5760 Immediate(0));
5761 if (successor == nullptr) {
5762 __ j(kNotEqual, slow_path->GetEntryLabel());
5763 __ Bind(slow_path->GetReturnLabel());
5764 } else {
5765 __ j(kEqual, codegen_->GetLabelOf(successor));
5766 __ jmp(slow_path->GetEntryLabel());
5767 }
5768 }
5769
GetAssembler() const5770 X86Assembler* ParallelMoveResolverX86::GetAssembler() const {
5771 return codegen_->GetAssembler();
5772 }
5773
MoveMemoryToMemory32(int dst,int src)5774 void ParallelMoveResolverX86::MoveMemoryToMemory32(int dst, int src) {
5775 ScratchRegisterScope ensure_scratch(
5776 this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
5777 Register temp_reg = static_cast<Register>(ensure_scratch.GetRegister());
5778 int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0;
5779 __ movl(temp_reg, Address(ESP, src + stack_offset));
5780 __ movl(Address(ESP, dst + stack_offset), temp_reg);
5781 }
5782
MoveMemoryToMemory64(int dst,int src)5783 void ParallelMoveResolverX86::MoveMemoryToMemory64(int dst, int src) {
5784 ScratchRegisterScope ensure_scratch(
5785 this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
5786 Register temp_reg = static_cast<Register>(ensure_scratch.GetRegister());
5787 int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0;
5788 __ movl(temp_reg, Address(ESP, src + stack_offset));
5789 __ movl(Address(ESP, dst + stack_offset), temp_reg);
5790 __ movl(temp_reg, Address(ESP, src + stack_offset + kX86WordSize));
5791 __ movl(Address(ESP, dst + stack_offset + kX86WordSize), temp_reg);
5792 }
5793
EmitMove(size_t index)5794 void ParallelMoveResolverX86::EmitMove(size_t index) {
5795 MoveOperands* move = moves_[index];
5796 Location source = move->GetSource();
5797 Location destination = move->GetDestination();
5798
5799 if (source.IsRegister()) {
5800 if (destination.IsRegister()) {
5801 __ movl(destination.AsRegister<Register>(), source.AsRegister<Register>());
5802 } else if (destination.IsFpuRegister()) {
5803 __ movd(destination.AsFpuRegister<XmmRegister>(), source.AsRegister<Register>());
5804 } else {
5805 DCHECK(destination.IsStackSlot());
5806 __ movl(Address(ESP, destination.GetStackIndex()), source.AsRegister<Register>());
5807 }
5808 } else if (source.IsRegisterPair()) {
5809 size_t elem_size = Primitive::ComponentSize(Primitive::kPrimInt);
5810 // Create stack space for 2 elements.
5811 __ subl(ESP, Immediate(2 * elem_size));
5812 __ movl(Address(ESP, 0), source.AsRegisterPairLow<Register>());
5813 __ movl(Address(ESP, elem_size), source.AsRegisterPairHigh<Register>());
5814 __ movsd(destination.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
5815 // And remove the temporary stack space we allocated.
5816 __ addl(ESP, Immediate(2 * elem_size));
5817 } else if (source.IsFpuRegister()) {
5818 if (destination.IsRegister()) {
5819 __ movd(destination.AsRegister<Register>(), source.AsFpuRegister<XmmRegister>());
5820 } else if (destination.IsFpuRegister()) {
5821 __ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
5822 } else if (destination.IsRegisterPair()) {
5823 XmmRegister src_reg = source.AsFpuRegister<XmmRegister>();
5824 __ movd(destination.AsRegisterPairLow<Register>(), src_reg);
5825 __ psrlq(src_reg, Immediate(32));
5826 __ movd(destination.AsRegisterPairHigh<Register>(), src_reg);
5827 } else if (destination.IsStackSlot()) {
5828 __ movss(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
5829 } else if (destination.IsDoubleStackSlot()) {
5830 __ movsd(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
5831 } else {
5832 DCHECK(destination.IsSIMDStackSlot());
5833 __ movups(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
5834 }
5835 } else if (source.IsStackSlot()) {
5836 if (destination.IsRegister()) {
5837 __ movl(destination.AsRegister<Register>(), Address(ESP, source.GetStackIndex()));
5838 } else if (destination.IsFpuRegister()) {
5839 __ movss(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex()));
5840 } else {
5841 DCHECK(destination.IsStackSlot());
5842 MoveMemoryToMemory32(destination.GetStackIndex(), source.GetStackIndex());
5843 }
5844 } else if (source.IsDoubleStackSlot()) {
5845 if (destination.IsRegisterPair()) {
5846 __ movl(destination.AsRegisterPairLow<Register>(), Address(ESP, source.GetStackIndex()));
5847 __ movl(destination.AsRegisterPairHigh<Register>(),
5848 Address(ESP, source.GetHighStackIndex(kX86WordSize)));
5849 } else if (destination.IsFpuRegister()) {
5850 __ movsd(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex()));
5851 } else {
5852 DCHECK(destination.IsDoubleStackSlot()) << destination;
5853 MoveMemoryToMemory64(destination.GetStackIndex(), source.GetStackIndex());
5854 }
5855 } else if (source.IsSIMDStackSlot()) {
5856 DCHECK(destination.IsFpuRegister());
5857 __ movups(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex()));
5858 } else if (source.IsConstant()) {
5859 HConstant* constant = source.GetConstant();
5860 if (constant->IsIntConstant() || constant->IsNullConstant()) {
5861 int32_t value = CodeGenerator::GetInt32ValueOf(constant);
5862 if (destination.IsRegister()) {
5863 if (value == 0) {
5864 __ xorl(destination.AsRegister<Register>(), destination.AsRegister<Register>());
5865 } else {
5866 __ movl(destination.AsRegister<Register>(), Immediate(value));
5867 }
5868 } else {
5869 DCHECK(destination.IsStackSlot()) << destination;
5870 __ movl(Address(ESP, destination.GetStackIndex()), Immediate(value));
5871 }
5872 } else if (constant->IsFloatConstant()) {
5873 float fp_value = constant->AsFloatConstant()->GetValue();
5874 int32_t value = bit_cast<int32_t, float>(fp_value);
5875 Immediate imm(value);
5876 if (destination.IsFpuRegister()) {
5877 XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
5878 if (value == 0) {
5879 // Easy handling of 0.0.
5880 __ xorps(dest, dest);
5881 } else {
5882 ScratchRegisterScope ensure_scratch(
5883 this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
5884 Register temp = static_cast<Register>(ensure_scratch.GetRegister());
5885 __ movl(temp, Immediate(value));
5886 __ movd(dest, temp);
5887 }
5888 } else {
5889 DCHECK(destination.IsStackSlot()) << destination;
5890 __ movl(Address(ESP, destination.GetStackIndex()), imm);
5891 }
5892 } else if (constant->IsLongConstant()) {
5893 int64_t value = constant->AsLongConstant()->GetValue();
5894 int32_t low_value = Low32Bits(value);
5895 int32_t high_value = High32Bits(value);
5896 Immediate low(low_value);
5897 Immediate high(high_value);
5898 if (destination.IsDoubleStackSlot()) {
5899 __ movl(Address(ESP, destination.GetStackIndex()), low);
5900 __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)), high);
5901 } else {
5902 __ movl(destination.AsRegisterPairLow<Register>(), low);
5903 __ movl(destination.AsRegisterPairHigh<Register>(), high);
5904 }
5905 } else {
5906 DCHECK(constant->IsDoubleConstant());
5907 double dbl_value = constant->AsDoubleConstant()->GetValue();
5908 int64_t value = bit_cast<int64_t, double>(dbl_value);
5909 int32_t low_value = Low32Bits(value);
5910 int32_t high_value = High32Bits(value);
5911 Immediate low(low_value);
5912 Immediate high(high_value);
5913 if (destination.IsFpuRegister()) {
5914 XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
5915 if (value == 0) {
5916 // Easy handling of 0.0.
5917 __ xorpd(dest, dest);
5918 } else {
5919 __ pushl(high);
5920 __ pushl(low);
5921 __ movsd(dest, Address(ESP, 0));
5922 __ addl(ESP, Immediate(8));
5923 }
5924 } else {
5925 DCHECK(destination.IsDoubleStackSlot()) << destination;
5926 __ movl(Address(ESP, destination.GetStackIndex()), low);
5927 __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)), high);
5928 }
5929 }
5930 } else {
5931 LOG(FATAL) << "Unimplemented move: " << destination << " <- " << source;
5932 }
5933 }
5934
Exchange(Register reg,int mem)5935 void ParallelMoveResolverX86::Exchange(Register reg, int mem) {
5936 Register suggested_scratch = reg == EAX ? EBX : EAX;
5937 ScratchRegisterScope ensure_scratch(
5938 this, reg, suggested_scratch, codegen_->GetNumberOfCoreRegisters());
5939
5940 int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0;
5941 __ movl(static_cast<Register>(ensure_scratch.GetRegister()), Address(ESP, mem + stack_offset));
5942 __ movl(Address(ESP, mem + stack_offset), reg);
5943 __ movl(reg, static_cast<Register>(ensure_scratch.GetRegister()));
5944 }
5945
Exchange32(XmmRegister reg,int mem)5946 void ParallelMoveResolverX86::Exchange32(XmmRegister reg, int mem) {
5947 ScratchRegisterScope ensure_scratch(
5948 this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
5949
5950 Register temp_reg = static_cast<Register>(ensure_scratch.GetRegister());
5951 int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0;
5952 __ movl(temp_reg, Address(ESP, mem + stack_offset));
5953 __ movss(Address(ESP, mem + stack_offset), reg);
5954 __ movd(reg, temp_reg);
5955 }
5956
Exchange(int mem1,int mem2)5957 void ParallelMoveResolverX86::Exchange(int mem1, int mem2) {
5958 ScratchRegisterScope ensure_scratch1(
5959 this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
5960
5961 Register suggested_scratch = ensure_scratch1.GetRegister() == EAX ? EBX : EAX;
5962 ScratchRegisterScope ensure_scratch2(
5963 this, ensure_scratch1.GetRegister(), suggested_scratch, codegen_->GetNumberOfCoreRegisters());
5964
5965 int stack_offset = ensure_scratch1.IsSpilled() ? kX86WordSize : 0;
5966 stack_offset += ensure_scratch2.IsSpilled() ? kX86WordSize : 0;
5967 __ movl(static_cast<Register>(ensure_scratch1.GetRegister()), Address(ESP, mem1 + stack_offset));
5968 __ movl(static_cast<Register>(ensure_scratch2.GetRegister()), Address(ESP, mem2 + stack_offset));
5969 __ movl(Address(ESP, mem2 + stack_offset), static_cast<Register>(ensure_scratch1.GetRegister()));
5970 __ movl(Address(ESP, mem1 + stack_offset), static_cast<Register>(ensure_scratch2.GetRegister()));
5971 }
5972
EmitSwap(size_t index)5973 void ParallelMoveResolverX86::EmitSwap(size_t index) {
5974 MoveOperands* move = moves_[index];
5975 Location source = move->GetSource();
5976 Location destination = move->GetDestination();
5977
5978 if (source.IsRegister() && destination.IsRegister()) {
5979 // Use XOR swap algorithm to avoid serializing XCHG instruction or using a temporary.
5980 DCHECK_NE(destination.AsRegister<Register>(), source.AsRegister<Register>());
5981 __ xorl(destination.AsRegister<Register>(), source.AsRegister<Register>());
5982 __ xorl(source.AsRegister<Register>(), destination.AsRegister<Register>());
5983 __ xorl(destination.AsRegister<Register>(), source.AsRegister<Register>());
5984 } else if (source.IsRegister() && destination.IsStackSlot()) {
5985 Exchange(source.AsRegister<Register>(), destination.GetStackIndex());
5986 } else if (source.IsStackSlot() && destination.IsRegister()) {
5987 Exchange(destination.AsRegister<Register>(), source.GetStackIndex());
5988 } else if (source.IsStackSlot() && destination.IsStackSlot()) {
5989 Exchange(destination.GetStackIndex(), source.GetStackIndex());
5990 } else if (source.IsFpuRegister() && destination.IsFpuRegister()) {
5991 // Use XOR Swap algorithm to avoid a temporary.
5992 DCHECK_NE(source.reg(), destination.reg());
5993 __ xorpd(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
5994 __ xorpd(source.AsFpuRegister<XmmRegister>(), destination.AsFpuRegister<XmmRegister>());
5995 __ xorpd(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
5996 } else if (source.IsFpuRegister() && destination.IsStackSlot()) {
5997 Exchange32(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
5998 } else if (destination.IsFpuRegister() && source.IsStackSlot()) {
5999 Exchange32(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
6000 } else if (source.IsFpuRegister() && destination.IsDoubleStackSlot()) {
6001 // Take advantage of the 16 bytes in the XMM register.
6002 XmmRegister reg = source.AsFpuRegister<XmmRegister>();
6003 Address stack(ESP, destination.GetStackIndex());
6004 // Load the double into the high doubleword.
6005 __ movhpd(reg, stack);
6006
6007 // Store the low double into the destination.
6008 __ movsd(stack, reg);
6009
6010 // Move the high double to the low double.
6011 __ psrldq(reg, Immediate(8));
6012 } else if (destination.IsFpuRegister() && source.IsDoubleStackSlot()) {
6013 // Take advantage of the 16 bytes in the XMM register.
6014 XmmRegister reg = destination.AsFpuRegister<XmmRegister>();
6015 Address stack(ESP, source.GetStackIndex());
6016 // Load the double into the high doubleword.
6017 __ movhpd(reg, stack);
6018
6019 // Store the low double into the destination.
6020 __ movsd(stack, reg);
6021
6022 // Move the high double to the low double.
6023 __ psrldq(reg, Immediate(8));
6024 } else if (destination.IsDoubleStackSlot() && source.IsDoubleStackSlot()) {
6025 Exchange(destination.GetStackIndex(), source.GetStackIndex());
6026 Exchange(destination.GetHighStackIndex(kX86WordSize), source.GetHighStackIndex(kX86WordSize));
6027 } else {
6028 LOG(FATAL) << "Unimplemented: source: " << source << ", destination: " << destination;
6029 }
6030 }
6031
SpillScratch(int reg)6032 void ParallelMoveResolverX86::SpillScratch(int reg) {
6033 __ pushl(static_cast<Register>(reg));
6034 }
6035
RestoreScratch(int reg)6036 void ParallelMoveResolverX86::RestoreScratch(int reg) {
6037 __ popl(static_cast<Register>(reg));
6038 }
6039
GetSupportedLoadClassKind(HLoadClass::LoadKind desired_class_load_kind)6040 HLoadClass::LoadKind CodeGeneratorX86::GetSupportedLoadClassKind(
6041 HLoadClass::LoadKind desired_class_load_kind) {
6042 switch (desired_class_load_kind) {
6043 case HLoadClass::LoadKind::kInvalid:
6044 LOG(FATAL) << "UNREACHABLE";
6045 UNREACHABLE();
6046 case HLoadClass::LoadKind::kReferrersClass:
6047 break;
6048 case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
6049 DCHECK(!GetCompilerOptions().GetCompilePic());
6050 break;
6051 case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
6052 DCHECK(GetCompilerOptions().GetCompilePic());
6053 FALLTHROUGH_INTENDED;
6054 case HLoadClass::LoadKind::kBssEntry:
6055 DCHECK(!Runtime::Current()->UseJitCompilation()); // Note: boot image is also non-JIT.
6056 break;
6057 case HLoadClass::LoadKind::kBootImageAddress:
6058 break;
6059 case HLoadClass::LoadKind::kJitTableAddress:
6060 DCHECK(Runtime::Current()->UseJitCompilation());
6061 break;
6062 case HLoadClass::LoadKind::kDexCacheViaMethod:
6063 break;
6064 }
6065 return desired_class_load_kind;
6066 }
6067
VisitLoadClass(HLoadClass * cls)6068 void LocationsBuilderX86::VisitLoadClass(HLoadClass* cls) {
6069 HLoadClass::LoadKind load_kind = cls->GetLoadKind();
6070 if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) {
6071 InvokeRuntimeCallingConvention calling_convention;
6072 CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(
6073 cls,
6074 Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
6075 Location::RegisterLocation(EAX));
6076 DCHECK_EQ(calling_convention.GetRegisterAt(0), EAX);
6077 return;
6078 }
6079 DCHECK(!cls->NeedsAccessCheck());
6080
6081 const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage();
6082 LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier)
6083 ? LocationSummary::kCallOnSlowPath
6084 : LocationSummary::kNoCall;
6085 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cls, call_kind);
6086 if (kUseBakerReadBarrier && requires_read_barrier && !cls->NeedsEnvironment()) {
6087 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
6088 }
6089
6090 if (load_kind == HLoadClass::LoadKind::kReferrersClass ||
6091 load_kind == HLoadClass::LoadKind::kBootImageLinkTimePcRelative ||
6092 load_kind == HLoadClass::LoadKind::kBssEntry) {
6093 locations->SetInAt(0, Location::RequiresRegister());
6094 }
6095 locations->SetOut(Location::RequiresRegister());
6096 if (load_kind == HLoadClass::LoadKind::kBssEntry) {
6097 if (!kUseReadBarrier || kUseBakerReadBarrier) {
6098 // Rely on the type resolution and/or initialization to save everything.
6099 RegisterSet caller_saves = RegisterSet::Empty();
6100 InvokeRuntimeCallingConvention calling_convention;
6101 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
6102 locations->SetCustomSlowPathCallerSaves(caller_saves);
6103 } else {
6104 // For non-Baker read barrier we have a temp-clobbering call.
6105 }
6106 }
6107 }
6108
NewJitRootClassPatch(const DexFile & dex_file,dex::TypeIndex dex_index,Handle<mirror::Class> handle)6109 Label* CodeGeneratorX86::NewJitRootClassPatch(const DexFile& dex_file,
6110 dex::TypeIndex dex_index,
6111 Handle<mirror::Class> handle) {
6112 jit_class_roots_.Overwrite(TypeReference(&dex_file, dex_index),
6113 reinterpret_cast64<uint64_t>(handle.GetReference()));
6114 // Add a patch entry and return the label.
6115 jit_class_patches_.emplace_back(dex_file, dex_index.index_);
6116 PatchInfo<Label>* info = &jit_class_patches_.back();
6117 return &info->label;
6118 }
6119
6120 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
6121 // move.
VisitLoadClass(HLoadClass * cls)6122 void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS {
6123 HLoadClass::LoadKind load_kind = cls->GetLoadKind();
6124 if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) {
6125 codegen_->GenerateLoadClassRuntimeCall(cls);
6126 return;
6127 }
6128 DCHECK(!cls->NeedsAccessCheck());
6129
6130 LocationSummary* locations = cls->GetLocations();
6131 Location out_loc = locations->Out();
6132 Register out = out_loc.AsRegister<Register>();
6133
6134 bool generate_null_check = false;
6135 const ReadBarrierOption read_barrier_option = cls->IsInBootImage()
6136 ? kWithoutReadBarrier
6137 : kCompilerReadBarrierOption;
6138 switch (load_kind) {
6139 case HLoadClass::LoadKind::kReferrersClass: {
6140 DCHECK(!cls->CanCallRuntime());
6141 DCHECK(!cls->MustGenerateClinitCheck());
6142 // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
6143 Register current_method = locations->InAt(0).AsRegister<Register>();
6144 GenerateGcRootFieldLoad(
6145 cls,
6146 out_loc,
6147 Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()),
6148 /* fixup_label */ nullptr,
6149 read_barrier_option);
6150 break;
6151 }
6152 case HLoadClass::LoadKind::kBootImageLinkTimeAddress: {
6153 DCHECK(codegen_->GetCompilerOptions().IsBootImage());
6154 DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
6155 __ movl(out, Immediate(/* placeholder */ 0));
6156 codegen_->RecordBootTypePatch(cls);
6157 break;
6158 }
6159 case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: {
6160 DCHECK(codegen_->GetCompilerOptions().IsBootImage());
6161 DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
6162 Register method_address = locations->InAt(0).AsRegister<Register>();
6163 __ leal(out, Address(method_address, CodeGeneratorX86::kDummy32BitOffset));
6164 codegen_->RecordBootTypePatch(cls);
6165 break;
6166 }
6167 case HLoadClass::LoadKind::kBootImageAddress: {
6168 DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
6169 uint32_t address = dchecked_integral_cast<uint32_t>(
6170 reinterpret_cast<uintptr_t>(cls->GetClass().Get()));
6171 DCHECK_NE(address, 0u);
6172 __ movl(out, Immediate(address));
6173 break;
6174 }
6175 case HLoadClass::LoadKind::kBssEntry: {
6176 Register method_address = locations->InAt(0).AsRegister<Register>();
6177 Address address(method_address, CodeGeneratorX86::kDummy32BitOffset);
6178 Label* fixup_label = codegen_->NewTypeBssEntryPatch(cls);
6179 GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option);
6180 generate_null_check = true;
6181 break;
6182 }
6183 case HLoadClass::LoadKind::kJitTableAddress: {
6184 Address address = Address::Absolute(CodeGeneratorX86::kDummy32BitOffset);
6185 Label* fixup_label = codegen_->NewJitRootClassPatch(
6186 cls->GetDexFile(), cls->GetTypeIndex(), cls->GetClass());
6187 // /* GcRoot<mirror::Class> */ out = *address
6188 GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option);
6189 break;
6190 }
6191 case HLoadClass::LoadKind::kDexCacheViaMethod:
6192 case HLoadClass::LoadKind::kInvalid:
6193 LOG(FATAL) << "UNREACHABLE";
6194 UNREACHABLE();
6195 }
6196
6197 if (generate_null_check || cls->MustGenerateClinitCheck()) {
6198 DCHECK(cls->CanCallRuntime());
6199 SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathX86(
6200 cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck());
6201 codegen_->AddSlowPath(slow_path);
6202
6203 if (generate_null_check) {
6204 __ testl(out, out);
6205 __ j(kEqual, slow_path->GetEntryLabel());
6206 }
6207
6208 if (cls->MustGenerateClinitCheck()) {
6209 GenerateClassInitializationCheck(slow_path, out);
6210 } else {
6211 __ Bind(slow_path->GetExitLabel());
6212 }
6213 }
6214 }
6215
VisitClinitCheck(HClinitCheck * check)6216 void LocationsBuilderX86::VisitClinitCheck(HClinitCheck* check) {
6217 LocationSummary* locations =
6218 new (GetGraph()->GetArena()) LocationSummary(check, LocationSummary::kCallOnSlowPath);
6219 locations->SetInAt(0, Location::RequiresRegister());
6220 if (check->HasUses()) {
6221 locations->SetOut(Location::SameAsFirstInput());
6222 }
6223 }
6224
VisitClinitCheck(HClinitCheck * check)6225 void InstructionCodeGeneratorX86::VisitClinitCheck(HClinitCheck* check) {
6226 // We assume the class to not be null.
6227 SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathX86(
6228 check->GetLoadClass(), check, check->GetDexPc(), true);
6229 codegen_->AddSlowPath(slow_path);
6230 GenerateClassInitializationCheck(slow_path,
6231 check->GetLocations()->InAt(0).AsRegister<Register>());
6232 }
6233
GenerateClassInitializationCheck(SlowPathCode * slow_path,Register class_reg)6234 void InstructionCodeGeneratorX86::GenerateClassInitializationCheck(
6235 SlowPathCode* slow_path, Register class_reg) {
6236 __ cmpl(Address(class_reg, mirror::Class::StatusOffset().Int32Value()),
6237 Immediate(mirror::Class::kStatusInitialized));
6238 __ j(kLess, slow_path->GetEntryLabel());
6239 __ Bind(slow_path->GetExitLabel());
6240 // No need for memory fence, thanks to the X86 memory model.
6241 }
6242
GetSupportedLoadStringKind(HLoadString::LoadKind desired_string_load_kind)6243 HLoadString::LoadKind CodeGeneratorX86::GetSupportedLoadStringKind(
6244 HLoadString::LoadKind desired_string_load_kind) {
6245 switch (desired_string_load_kind) {
6246 case HLoadString::LoadKind::kBootImageLinkTimeAddress:
6247 DCHECK(!GetCompilerOptions().GetCompilePic());
6248 break;
6249 case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
6250 DCHECK(GetCompilerOptions().GetCompilePic());
6251 FALLTHROUGH_INTENDED;
6252 case HLoadString::LoadKind::kBssEntry:
6253 DCHECK(!Runtime::Current()->UseJitCompilation()); // Note: boot image is also non-JIT.
6254 break;
6255 case HLoadString::LoadKind::kBootImageAddress:
6256 break;
6257 case HLoadString::LoadKind::kJitTableAddress:
6258 DCHECK(Runtime::Current()->UseJitCompilation());
6259 break;
6260 case HLoadString::LoadKind::kDexCacheViaMethod:
6261 break;
6262 }
6263 return desired_string_load_kind;
6264 }
6265
VisitLoadString(HLoadString * load)6266 void LocationsBuilderX86::VisitLoadString(HLoadString* load) {
6267 LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load);
6268 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind);
6269 HLoadString::LoadKind load_kind = load->GetLoadKind();
6270 if (load_kind == HLoadString::LoadKind::kBootImageLinkTimePcRelative ||
6271 load_kind == HLoadString::LoadKind::kBssEntry) {
6272 locations->SetInAt(0, Location::RequiresRegister());
6273 }
6274 if (load_kind == HLoadString::LoadKind::kDexCacheViaMethod) {
6275 locations->SetOut(Location::RegisterLocation(EAX));
6276 } else {
6277 locations->SetOut(Location::RequiresRegister());
6278 if (load_kind == HLoadString::LoadKind::kBssEntry) {
6279 if (!kUseReadBarrier || kUseBakerReadBarrier) {
6280 // Rely on the pResolveString to save everything.
6281 RegisterSet caller_saves = RegisterSet::Empty();
6282 InvokeRuntimeCallingConvention calling_convention;
6283 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
6284 locations->SetCustomSlowPathCallerSaves(caller_saves);
6285 } else {
6286 // For non-Baker read barrier we have a temp-clobbering call.
6287 }
6288 }
6289 }
6290 }
6291
NewJitRootStringPatch(const DexFile & dex_file,dex::StringIndex dex_index,Handle<mirror::String> handle)6292 Label* CodeGeneratorX86::NewJitRootStringPatch(const DexFile& dex_file,
6293 dex::StringIndex dex_index,
6294 Handle<mirror::String> handle) {
6295 jit_string_roots_.Overwrite(
6296 StringReference(&dex_file, dex_index), reinterpret_cast64<uint64_t>(handle.GetReference()));
6297 // Add a patch entry and return the label.
6298 jit_string_patches_.emplace_back(dex_file, dex_index.index_);
6299 PatchInfo<Label>* info = &jit_string_patches_.back();
6300 return &info->label;
6301 }
6302
6303 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
6304 // move.
VisitLoadString(HLoadString * load)6305 void InstructionCodeGeneratorX86::VisitLoadString(HLoadString* load) NO_THREAD_SAFETY_ANALYSIS {
6306 LocationSummary* locations = load->GetLocations();
6307 Location out_loc = locations->Out();
6308 Register out = out_loc.AsRegister<Register>();
6309
6310 switch (load->GetLoadKind()) {
6311 case HLoadString::LoadKind::kBootImageLinkTimeAddress: {
6312 DCHECK(codegen_->GetCompilerOptions().IsBootImage());
6313 __ movl(out, Immediate(/* placeholder */ 0));
6314 codegen_->RecordBootStringPatch(load);
6315 return; // No dex cache slow path.
6316 }
6317 case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
6318 DCHECK(codegen_->GetCompilerOptions().IsBootImage());
6319 Register method_address = locations->InAt(0).AsRegister<Register>();
6320 __ leal(out, Address(method_address, CodeGeneratorX86::kDummy32BitOffset));
6321 codegen_->RecordBootStringPatch(load);
6322 return; // No dex cache slow path.
6323 }
6324 case HLoadString::LoadKind::kBootImageAddress: {
6325 uint32_t address = dchecked_integral_cast<uint32_t>(
6326 reinterpret_cast<uintptr_t>(load->GetString().Get()));
6327 DCHECK_NE(address, 0u);
6328 __ movl(out, Immediate(address));
6329 return; // No dex cache slow path.
6330 }
6331 case HLoadString::LoadKind::kBssEntry: {
6332 Register method_address = locations->InAt(0).AsRegister<Register>();
6333 Address address = Address(method_address, CodeGeneratorX86::kDummy32BitOffset);
6334 Label* fixup_label = codegen_->NewStringBssEntryPatch(load);
6335 // /* GcRoot<mirror::String> */ out = *address /* PC-relative */
6336 GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kCompilerReadBarrierOption);
6337 SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathX86(load);
6338 codegen_->AddSlowPath(slow_path);
6339 __ testl(out, out);
6340 __ j(kEqual, slow_path->GetEntryLabel());
6341 __ Bind(slow_path->GetExitLabel());
6342 return;
6343 }
6344 case HLoadString::LoadKind::kJitTableAddress: {
6345 Address address = Address::Absolute(CodeGeneratorX86::kDummy32BitOffset);
6346 Label* fixup_label = codegen_->NewJitRootStringPatch(
6347 load->GetDexFile(), load->GetStringIndex(), load->GetString());
6348 // /* GcRoot<mirror::String> */ out = *address
6349 GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kCompilerReadBarrierOption);
6350 return;
6351 }
6352 default:
6353 break;
6354 }
6355
6356 // TODO: Re-add the compiler code to do string dex cache lookup again.
6357 InvokeRuntimeCallingConvention calling_convention;
6358 DCHECK_EQ(calling_convention.GetRegisterAt(0), out);
6359 __ movl(calling_convention.GetRegisterAt(0), Immediate(load->GetStringIndex().index_));
6360 codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc());
6361 CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
6362 }
6363
GetExceptionTlsAddress()6364 static Address GetExceptionTlsAddress() {
6365 return Address::Absolute(Thread::ExceptionOffset<kX86PointerSize>().Int32Value());
6366 }
6367
VisitLoadException(HLoadException * load)6368 void LocationsBuilderX86::VisitLoadException(HLoadException* load) {
6369 LocationSummary* locations =
6370 new (GetGraph()->GetArena()) LocationSummary(load, LocationSummary::kNoCall);
6371 locations->SetOut(Location::RequiresRegister());
6372 }
6373
VisitLoadException(HLoadException * load)6374 void InstructionCodeGeneratorX86::VisitLoadException(HLoadException* load) {
6375 __ fs()->movl(load->GetLocations()->Out().AsRegister<Register>(), GetExceptionTlsAddress());
6376 }
6377
VisitClearException(HClearException * clear)6378 void LocationsBuilderX86::VisitClearException(HClearException* clear) {
6379 new (GetGraph()->GetArena()) LocationSummary(clear, LocationSummary::kNoCall);
6380 }
6381
VisitClearException(HClearException * clear ATTRIBUTE_UNUSED)6382 void InstructionCodeGeneratorX86::VisitClearException(HClearException* clear ATTRIBUTE_UNUSED) {
6383 __ fs()->movl(GetExceptionTlsAddress(), Immediate(0));
6384 }
6385
VisitThrow(HThrow * instruction)6386 void LocationsBuilderX86::VisitThrow(HThrow* instruction) {
6387 LocationSummary* locations =
6388 new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
6389 InvokeRuntimeCallingConvention calling_convention;
6390 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
6391 }
6392
VisitThrow(HThrow * instruction)6393 void InstructionCodeGeneratorX86::VisitThrow(HThrow* instruction) {
6394 codegen_->InvokeRuntime(kQuickDeliverException, instruction, instruction->GetDexPc());
6395 CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
6396 }
6397
6398 // Temp is used for read barrier.
NumberOfInstanceOfTemps(TypeCheckKind type_check_kind)6399 static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) {
6400 if (kEmitCompilerReadBarrier &&
6401 !kUseBakerReadBarrier &&
6402 (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
6403 type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
6404 type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
6405 return 1;
6406 }
6407 return 0;
6408 }
6409
6410 // Interface case has 3 temps, one for holding the number of interfaces, one for the current
6411 // interface pointer, one for loading the current interface.
6412 // The other checks have one temp for loading the object's class.
NumberOfCheckCastTemps(TypeCheckKind type_check_kind)6413 static size_t NumberOfCheckCastTemps(TypeCheckKind type_check_kind) {
6414 if (type_check_kind == TypeCheckKind::kInterfaceCheck && !kPoisonHeapReferences) {
6415 return 2;
6416 }
6417 return 1 + NumberOfInstanceOfTemps(type_check_kind);
6418 }
6419
VisitInstanceOf(HInstanceOf * instruction)6420 void LocationsBuilderX86::VisitInstanceOf(HInstanceOf* instruction) {
6421 LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
6422 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
6423 bool baker_read_barrier_slow_path = false;
6424 switch (type_check_kind) {
6425 case TypeCheckKind::kExactCheck:
6426 case TypeCheckKind::kAbstractClassCheck:
6427 case TypeCheckKind::kClassHierarchyCheck:
6428 case TypeCheckKind::kArrayObjectCheck:
6429 call_kind =
6430 kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
6431 baker_read_barrier_slow_path = kUseBakerReadBarrier;
6432 break;
6433 case TypeCheckKind::kArrayCheck:
6434 case TypeCheckKind::kUnresolvedCheck:
6435 case TypeCheckKind::kInterfaceCheck:
6436 call_kind = LocationSummary::kCallOnSlowPath;
6437 break;
6438 }
6439
6440 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
6441 if (baker_read_barrier_slow_path) {
6442 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
6443 }
6444 locations->SetInAt(0, Location::RequiresRegister());
6445 locations->SetInAt(1, Location::Any());
6446 // Note that TypeCheckSlowPathX86 uses this "out" register too.
6447 locations->SetOut(Location::RequiresRegister());
6448 // When read barriers are enabled, we need a temporary register for some cases.
6449 locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind));
6450 }
6451
VisitInstanceOf(HInstanceOf * instruction)6452 void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) {
6453 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
6454 LocationSummary* locations = instruction->GetLocations();
6455 Location obj_loc = locations->InAt(0);
6456 Register obj = obj_loc.AsRegister<Register>();
6457 Location cls = locations->InAt(1);
6458 Location out_loc = locations->Out();
6459 Register out = out_loc.AsRegister<Register>();
6460 const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind);
6461 DCHECK_LE(num_temps, 1u);
6462 Location maybe_temp_loc = (num_temps >= 1) ? locations->GetTemp(0) : Location::NoLocation();
6463 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
6464 uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
6465 uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
6466 uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
6467 SlowPathCode* slow_path = nullptr;
6468 NearLabel done, zero;
6469
6470 // Return 0 if `obj` is null.
6471 // Avoid null check if we know obj is not null.
6472 if (instruction->MustDoNullCheck()) {
6473 __ testl(obj, obj);
6474 __ j(kEqual, &zero);
6475 }
6476
6477 switch (type_check_kind) {
6478 case TypeCheckKind::kExactCheck: {
6479 // /* HeapReference<Class> */ out = obj->klass_
6480 GenerateReferenceLoadTwoRegisters(instruction,
6481 out_loc,
6482 obj_loc,
6483 class_offset,
6484 kCompilerReadBarrierOption);
6485 if (cls.IsRegister()) {
6486 __ cmpl(out, cls.AsRegister<Register>());
6487 } else {
6488 DCHECK(cls.IsStackSlot()) << cls;
6489 __ cmpl(out, Address(ESP, cls.GetStackIndex()));
6490 }
6491
6492 // Classes must be equal for the instanceof to succeed.
6493 __ j(kNotEqual, &zero);
6494 __ movl(out, Immediate(1));
6495 __ jmp(&done);
6496 break;
6497 }
6498
6499 case TypeCheckKind::kAbstractClassCheck: {
6500 // /* HeapReference<Class> */ out = obj->klass_
6501 GenerateReferenceLoadTwoRegisters(instruction,
6502 out_loc,
6503 obj_loc,
6504 class_offset,
6505 kCompilerReadBarrierOption);
6506 // If the class is abstract, we eagerly fetch the super class of the
6507 // object to avoid doing a comparison we know will fail.
6508 NearLabel loop;
6509 __ Bind(&loop);
6510 // /* HeapReference<Class> */ out = out->super_class_
6511 GenerateReferenceLoadOneRegister(instruction,
6512 out_loc,
6513 super_offset,
6514 maybe_temp_loc,
6515 kCompilerReadBarrierOption);
6516 __ testl(out, out);
6517 // If `out` is null, we use it for the result, and jump to `done`.
6518 __ j(kEqual, &done);
6519 if (cls.IsRegister()) {
6520 __ cmpl(out, cls.AsRegister<Register>());
6521 } else {
6522 DCHECK(cls.IsStackSlot()) << cls;
6523 __ cmpl(out, Address(ESP, cls.GetStackIndex()));
6524 }
6525 __ j(kNotEqual, &loop);
6526 __ movl(out, Immediate(1));
6527 if (zero.IsLinked()) {
6528 __ jmp(&done);
6529 }
6530 break;
6531 }
6532
6533 case TypeCheckKind::kClassHierarchyCheck: {
6534 // /* HeapReference<Class> */ out = obj->klass_
6535 GenerateReferenceLoadTwoRegisters(instruction,
6536 out_loc,
6537 obj_loc,
6538 class_offset,
6539 kCompilerReadBarrierOption);
6540 // Walk over the class hierarchy to find a match.
6541 NearLabel loop, success;
6542 __ Bind(&loop);
6543 if (cls.IsRegister()) {
6544 __ cmpl(out, cls.AsRegister<Register>());
6545 } else {
6546 DCHECK(cls.IsStackSlot()) << cls;
6547 __ cmpl(out, Address(ESP, cls.GetStackIndex()));
6548 }
6549 __ j(kEqual, &success);
6550 // /* HeapReference<Class> */ out = out->super_class_
6551 GenerateReferenceLoadOneRegister(instruction,
6552 out_loc,
6553 super_offset,
6554 maybe_temp_loc,
6555 kCompilerReadBarrierOption);
6556 __ testl(out, out);
6557 __ j(kNotEqual, &loop);
6558 // If `out` is null, we use it for the result, and jump to `done`.
6559 __ jmp(&done);
6560 __ Bind(&success);
6561 __ movl(out, Immediate(1));
6562 if (zero.IsLinked()) {
6563 __ jmp(&done);
6564 }
6565 break;
6566 }
6567
6568 case TypeCheckKind::kArrayObjectCheck: {
6569 // /* HeapReference<Class> */ out = obj->klass_
6570 GenerateReferenceLoadTwoRegisters(instruction,
6571 out_loc,
6572 obj_loc,
6573 class_offset,
6574 kCompilerReadBarrierOption);
6575 // Do an exact check.
6576 NearLabel exact_check;
6577 if (cls.IsRegister()) {
6578 __ cmpl(out, cls.AsRegister<Register>());
6579 } else {
6580 DCHECK(cls.IsStackSlot()) << cls;
6581 __ cmpl(out, Address(ESP, cls.GetStackIndex()));
6582 }
6583 __ j(kEqual, &exact_check);
6584 // Otherwise, we need to check that the object's class is a non-primitive array.
6585 // /* HeapReference<Class> */ out = out->component_type_
6586 GenerateReferenceLoadOneRegister(instruction,
6587 out_loc,
6588 component_offset,
6589 maybe_temp_loc,
6590 kCompilerReadBarrierOption);
6591 __ testl(out, out);
6592 // If `out` is null, we use it for the result, and jump to `done`.
6593 __ j(kEqual, &done);
6594 __ cmpw(Address(out, primitive_offset), Immediate(Primitive::kPrimNot));
6595 __ j(kNotEqual, &zero);
6596 __ Bind(&exact_check);
6597 __ movl(out, Immediate(1));
6598 __ jmp(&done);
6599 break;
6600 }
6601
6602 case TypeCheckKind::kArrayCheck: {
6603 // No read barrier since the slow path will retry upon failure.
6604 // /* HeapReference<Class> */ out = obj->klass_
6605 GenerateReferenceLoadTwoRegisters(instruction,
6606 out_loc,
6607 obj_loc,
6608 class_offset,
6609 kWithoutReadBarrier);
6610 if (cls.IsRegister()) {
6611 __ cmpl(out, cls.AsRegister<Register>());
6612 } else {
6613 DCHECK(cls.IsStackSlot()) << cls;
6614 __ cmpl(out, Address(ESP, cls.GetStackIndex()));
6615 }
6616 DCHECK(locations->OnlyCallsOnSlowPath());
6617 slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86(instruction,
6618 /* is_fatal */ false);
6619 codegen_->AddSlowPath(slow_path);
6620 __ j(kNotEqual, slow_path->GetEntryLabel());
6621 __ movl(out, Immediate(1));
6622 if (zero.IsLinked()) {
6623 __ jmp(&done);
6624 }
6625 break;
6626 }
6627
6628 case TypeCheckKind::kUnresolvedCheck:
6629 case TypeCheckKind::kInterfaceCheck: {
6630 // Note that we indeed only call on slow path, but we always go
6631 // into the slow path for the unresolved and interface check
6632 // cases.
6633 //
6634 // We cannot directly call the InstanceofNonTrivial runtime
6635 // entry point without resorting to a type checking slow path
6636 // here (i.e. by calling InvokeRuntime directly), as it would
6637 // require to assign fixed registers for the inputs of this
6638 // HInstanceOf instruction (following the runtime calling
6639 // convention), which might be cluttered by the potential first
6640 // read barrier emission at the beginning of this method.
6641 //
6642 // TODO: Introduce a new runtime entry point taking the object
6643 // to test (instead of its class) as argument, and let it deal
6644 // with the read barrier issues. This will let us refactor this
6645 // case of the `switch` code as it was previously (with a direct
6646 // call to the runtime not using a type checking slow path).
6647 // This should also be beneficial for the other cases above.
6648 DCHECK(locations->OnlyCallsOnSlowPath());
6649 slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86(instruction,
6650 /* is_fatal */ false);
6651 codegen_->AddSlowPath(slow_path);
6652 __ jmp(slow_path->GetEntryLabel());
6653 if (zero.IsLinked()) {
6654 __ jmp(&done);
6655 }
6656 break;
6657 }
6658 }
6659
6660 if (zero.IsLinked()) {
6661 __ Bind(&zero);
6662 __ xorl(out, out);
6663 }
6664
6665 if (done.IsLinked()) {
6666 __ Bind(&done);
6667 }
6668
6669 if (slow_path != nullptr) {
6670 __ Bind(slow_path->GetExitLabel());
6671 }
6672 }
6673
IsTypeCheckSlowPathFatal(TypeCheckKind type_check_kind,bool throws_into_catch)6674 static bool IsTypeCheckSlowPathFatal(TypeCheckKind type_check_kind, bool throws_into_catch) {
6675 switch (type_check_kind) {
6676 case TypeCheckKind::kExactCheck:
6677 case TypeCheckKind::kAbstractClassCheck:
6678 case TypeCheckKind::kClassHierarchyCheck:
6679 case TypeCheckKind::kArrayObjectCheck:
6680 return !throws_into_catch && !kEmitCompilerReadBarrier;
6681 case TypeCheckKind::kInterfaceCheck:
6682 return !throws_into_catch && !kEmitCompilerReadBarrier && !kPoisonHeapReferences;
6683 case TypeCheckKind::kArrayCheck:
6684 case TypeCheckKind::kUnresolvedCheck:
6685 return false;
6686 }
6687 LOG(FATAL) << "Unreachable";
6688 UNREACHABLE();
6689 }
6690
VisitCheckCast(HCheckCast * instruction)6691 void LocationsBuilderX86::VisitCheckCast(HCheckCast* instruction) {
6692 bool throws_into_catch = instruction->CanThrowIntoCatchBlock();
6693 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
6694 LocationSummary::CallKind call_kind =
6695 IsTypeCheckSlowPathFatal(type_check_kind, throws_into_catch)
6696 ? LocationSummary::kNoCall
6697 : LocationSummary::kCallOnSlowPath;
6698 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
6699 locations->SetInAt(0, Location::RequiresRegister());
6700 if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
6701 // Require a register for the interface check since there is a loop that compares the class to
6702 // a memory address.
6703 locations->SetInAt(1, Location::RequiresRegister());
6704 } else {
6705 locations->SetInAt(1, Location::Any());
6706 }
6707 // Note that TypeCheckSlowPathX86 uses this "temp" register too.
6708 locations->AddTemp(Location::RequiresRegister());
6709 // When read barriers are enabled, we need an additional temporary register for some cases.
6710 locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind));
6711 }
6712
VisitCheckCast(HCheckCast * instruction)6713 void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) {
6714 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
6715 LocationSummary* locations = instruction->GetLocations();
6716 Location obj_loc = locations->InAt(0);
6717 Register obj = obj_loc.AsRegister<Register>();
6718 Location cls = locations->InAt(1);
6719 Location temp_loc = locations->GetTemp(0);
6720 Register temp = temp_loc.AsRegister<Register>();
6721 const size_t num_temps = NumberOfCheckCastTemps(type_check_kind);
6722 DCHECK_GE(num_temps, 1u);
6723 DCHECK_LE(num_temps, 2u);
6724 Location maybe_temp2_loc = (num_temps >= 2) ? locations->GetTemp(1) : Location::NoLocation();
6725 const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
6726 const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
6727 const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
6728 const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
6729 const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value();
6730 const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value();
6731 const uint32_t object_array_data_offset =
6732 mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
6733
6734 // Always false for read barriers since we may need to go to the entrypoint for non-fatal cases
6735 // from false negatives. The false negatives may come from avoiding read barriers below. Avoiding
6736 // read barriers is done for performance and code size reasons.
6737 bool is_type_check_slow_path_fatal =
6738 IsTypeCheckSlowPathFatal(type_check_kind, instruction->CanThrowIntoCatchBlock());
6739
6740 SlowPathCode* type_check_slow_path =
6741 new (GetGraph()->GetArena()) TypeCheckSlowPathX86(instruction,
6742 is_type_check_slow_path_fatal);
6743 codegen_->AddSlowPath(type_check_slow_path);
6744
6745 NearLabel done;
6746 // Avoid null check if we know obj is not null.
6747 if (instruction->MustDoNullCheck()) {
6748 __ testl(obj, obj);
6749 __ j(kEqual, &done);
6750 }
6751
6752 switch (type_check_kind) {
6753 case TypeCheckKind::kExactCheck:
6754 case TypeCheckKind::kArrayCheck: {
6755 // /* HeapReference<Class> */ temp = obj->klass_
6756 GenerateReferenceLoadTwoRegisters(instruction,
6757 temp_loc,
6758 obj_loc,
6759 class_offset,
6760 kWithoutReadBarrier);
6761
6762 if (cls.IsRegister()) {
6763 __ cmpl(temp, cls.AsRegister<Register>());
6764 } else {
6765 DCHECK(cls.IsStackSlot()) << cls;
6766 __ cmpl(temp, Address(ESP, cls.GetStackIndex()));
6767 }
6768 // Jump to slow path for throwing the exception or doing a
6769 // more involved array check.
6770 __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
6771 break;
6772 }
6773
6774 case TypeCheckKind::kAbstractClassCheck: {
6775 // /* HeapReference<Class> */ temp = obj->klass_
6776 GenerateReferenceLoadTwoRegisters(instruction,
6777 temp_loc,
6778 obj_loc,
6779 class_offset,
6780 kWithoutReadBarrier);
6781
6782 // If the class is abstract, we eagerly fetch the super class of the
6783 // object to avoid doing a comparison we know will fail.
6784 NearLabel loop;
6785 __ Bind(&loop);
6786 // /* HeapReference<Class> */ temp = temp->super_class_
6787 GenerateReferenceLoadOneRegister(instruction,
6788 temp_loc,
6789 super_offset,
6790 maybe_temp2_loc,
6791 kWithoutReadBarrier);
6792
6793 // If the class reference currently in `temp` is null, jump to the slow path to throw the
6794 // exception.
6795 __ testl(temp, temp);
6796 __ j(kZero, type_check_slow_path->GetEntryLabel());
6797
6798 // Otherwise, compare the classes
6799 if (cls.IsRegister()) {
6800 __ cmpl(temp, cls.AsRegister<Register>());
6801 } else {
6802 DCHECK(cls.IsStackSlot()) << cls;
6803 __ cmpl(temp, Address(ESP, cls.GetStackIndex()));
6804 }
6805 __ j(kNotEqual, &loop);
6806 break;
6807 }
6808
6809 case TypeCheckKind::kClassHierarchyCheck: {
6810 // /* HeapReference<Class> */ temp = obj->klass_
6811 GenerateReferenceLoadTwoRegisters(instruction,
6812 temp_loc,
6813 obj_loc,
6814 class_offset,
6815 kWithoutReadBarrier);
6816
6817 // Walk over the class hierarchy to find a match.
6818 NearLabel loop;
6819 __ Bind(&loop);
6820 if (cls.IsRegister()) {
6821 __ cmpl(temp, cls.AsRegister<Register>());
6822 } else {
6823 DCHECK(cls.IsStackSlot()) << cls;
6824 __ cmpl(temp, Address(ESP, cls.GetStackIndex()));
6825 }
6826 __ j(kEqual, &done);
6827
6828 // /* HeapReference<Class> */ temp = temp->super_class_
6829 GenerateReferenceLoadOneRegister(instruction,
6830 temp_loc,
6831 super_offset,
6832 maybe_temp2_loc,
6833 kWithoutReadBarrier);
6834
6835 // If the class reference currently in `temp` is not null, jump
6836 // back at the beginning of the loop.
6837 __ testl(temp, temp);
6838 __ j(kNotZero, &loop);
6839 // Otherwise, jump to the slow path to throw the exception.;
6840 __ jmp(type_check_slow_path->GetEntryLabel());
6841 break;
6842 }
6843
6844 case TypeCheckKind::kArrayObjectCheck: {
6845 // /* HeapReference<Class> */ temp = obj->klass_
6846 GenerateReferenceLoadTwoRegisters(instruction,
6847 temp_loc,
6848 obj_loc,
6849 class_offset,
6850 kWithoutReadBarrier);
6851
6852 // Do an exact check.
6853 if (cls.IsRegister()) {
6854 __ cmpl(temp, cls.AsRegister<Register>());
6855 } else {
6856 DCHECK(cls.IsStackSlot()) << cls;
6857 __ cmpl(temp, Address(ESP, cls.GetStackIndex()));
6858 }
6859 __ j(kEqual, &done);
6860
6861 // Otherwise, we need to check that the object's class is a non-primitive array.
6862 // /* HeapReference<Class> */ temp = temp->component_type_
6863 GenerateReferenceLoadOneRegister(instruction,
6864 temp_loc,
6865 component_offset,
6866 maybe_temp2_loc,
6867 kWithoutReadBarrier);
6868
6869 // If the component type is null (i.e. the object not an array), jump to the slow path to
6870 // throw the exception. Otherwise proceed with the check.
6871 __ testl(temp, temp);
6872 __ j(kZero, type_check_slow_path->GetEntryLabel());
6873
6874 __ cmpw(Address(temp, primitive_offset), Immediate(Primitive::kPrimNot));
6875 __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
6876 break;
6877 }
6878
6879 case TypeCheckKind::kUnresolvedCheck:
6880 // We always go into the type check slow path for the unresolved check case.
6881 // We cannot directly call the CheckCast runtime entry point
6882 // without resorting to a type checking slow path here (i.e. by
6883 // calling InvokeRuntime directly), as it would require to
6884 // assign fixed registers for the inputs of this HInstanceOf
6885 // instruction (following the runtime calling convention), which
6886 // might be cluttered by the potential first read barrier
6887 // emission at the beginning of this method.
6888 __ jmp(type_check_slow_path->GetEntryLabel());
6889 break;
6890
6891 case TypeCheckKind::kInterfaceCheck: {
6892 // Fast path for the interface check. Since we compare with a memory location in the inner
6893 // loop we would need to have cls poisoned. However unpoisoning cls would reset the
6894 // conditional flags and cause the conditional jump to be incorrect. Therefore we just jump
6895 // to the slow path if we are running under poisoning.
6896 if (!kPoisonHeapReferences) {
6897 // Try to avoid read barriers to improve the fast path. We can not get false positives by
6898 // doing this.
6899 // /* HeapReference<Class> */ temp = obj->klass_
6900 GenerateReferenceLoadTwoRegisters(instruction,
6901 temp_loc,
6902 obj_loc,
6903 class_offset,
6904 kWithoutReadBarrier);
6905
6906 // /* HeapReference<Class> */ temp = temp->iftable_
6907 GenerateReferenceLoadTwoRegisters(instruction,
6908 temp_loc,
6909 temp_loc,
6910 iftable_offset,
6911 kWithoutReadBarrier);
6912 // Iftable is never null.
6913 __ movl(maybe_temp2_loc.AsRegister<Register>(), Address(temp, array_length_offset));
6914 // Loop through the iftable and check if any class matches.
6915 NearLabel start_loop;
6916 __ Bind(&start_loop);
6917 // Need to subtract first to handle the empty array case.
6918 __ subl(maybe_temp2_loc.AsRegister<Register>(), Immediate(2));
6919 __ j(kNegative, type_check_slow_path->GetEntryLabel());
6920 // Go to next interface if the classes do not match.
6921 __ cmpl(cls.AsRegister<Register>(),
6922 CodeGeneratorX86::ArrayAddress(temp,
6923 maybe_temp2_loc,
6924 TIMES_4,
6925 object_array_data_offset));
6926 __ j(kNotEqual, &start_loop);
6927 } else {
6928 __ jmp(type_check_slow_path->GetEntryLabel());
6929 }
6930 break;
6931 }
6932 }
6933 __ Bind(&done);
6934
6935 __ Bind(type_check_slow_path->GetExitLabel());
6936 }
6937
VisitMonitorOperation(HMonitorOperation * instruction)6938 void LocationsBuilderX86::VisitMonitorOperation(HMonitorOperation* instruction) {
6939 LocationSummary* locations =
6940 new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
6941 InvokeRuntimeCallingConvention calling_convention;
6942 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
6943 }
6944
VisitMonitorOperation(HMonitorOperation * instruction)6945 void InstructionCodeGeneratorX86::VisitMonitorOperation(HMonitorOperation* instruction) {
6946 codegen_->InvokeRuntime(instruction->IsEnter() ? kQuickLockObject
6947 : kQuickUnlockObject,
6948 instruction,
6949 instruction->GetDexPc());
6950 if (instruction->IsEnter()) {
6951 CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
6952 } else {
6953 CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
6954 }
6955 }
6956
VisitAnd(HAnd * instruction)6957 void LocationsBuilderX86::VisitAnd(HAnd* instruction) { HandleBitwiseOperation(instruction); }
VisitOr(HOr * instruction)6958 void LocationsBuilderX86::VisitOr(HOr* instruction) { HandleBitwiseOperation(instruction); }
VisitXor(HXor * instruction)6959 void LocationsBuilderX86::VisitXor(HXor* instruction) { HandleBitwiseOperation(instruction); }
6960
HandleBitwiseOperation(HBinaryOperation * instruction)6961 void LocationsBuilderX86::HandleBitwiseOperation(HBinaryOperation* instruction) {
6962 LocationSummary* locations =
6963 new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
6964 DCHECK(instruction->GetResultType() == Primitive::kPrimInt
6965 || instruction->GetResultType() == Primitive::kPrimLong);
6966 locations->SetInAt(0, Location::RequiresRegister());
6967 locations->SetInAt(1, Location::Any());
6968 locations->SetOut(Location::SameAsFirstInput());
6969 }
6970
VisitAnd(HAnd * instruction)6971 void InstructionCodeGeneratorX86::VisitAnd(HAnd* instruction) {
6972 HandleBitwiseOperation(instruction);
6973 }
6974
VisitOr(HOr * instruction)6975 void InstructionCodeGeneratorX86::VisitOr(HOr* instruction) {
6976 HandleBitwiseOperation(instruction);
6977 }
6978
VisitXor(HXor * instruction)6979 void InstructionCodeGeneratorX86::VisitXor(HXor* instruction) {
6980 HandleBitwiseOperation(instruction);
6981 }
6982
HandleBitwiseOperation(HBinaryOperation * instruction)6983 void InstructionCodeGeneratorX86::HandleBitwiseOperation(HBinaryOperation* instruction) {
6984 LocationSummary* locations = instruction->GetLocations();
6985 Location first = locations->InAt(0);
6986 Location second = locations->InAt(1);
6987 DCHECK(first.Equals(locations->Out()));
6988
6989 if (instruction->GetResultType() == Primitive::kPrimInt) {
6990 if (second.IsRegister()) {
6991 if (instruction->IsAnd()) {
6992 __ andl(first.AsRegister<Register>(), second.AsRegister<Register>());
6993 } else if (instruction->IsOr()) {
6994 __ orl(first.AsRegister<Register>(), second.AsRegister<Register>());
6995 } else {
6996 DCHECK(instruction->IsXor());
6997 __ xorl(first.AsRegister<Register>(), second.AsRegister<Register>());
6998 }
6999 } else if (second.IsConstant()) {
7000 if (instruction->IsAnd()) {
7001 __ andl(first.AsRegister<Register>(),
7002 Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
7003 } else if (instruction->IsOr()) {
7004 __ orl(first.AsRegister<Register>(),
7005 Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
7006 } else {
7007 DCHECK(instruction->IsXor());
7008 __ xorl(first.AsRegister<Register>(),
7009 Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
7010 }
7011 } else {
7012 if (instruction->IsAnd()) {
7013 __ andl(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
7014 } else if (instruction->IsOr()) {
7015 __ orl(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
7016 } else {
7017 DCHECK(instruction->IsXor());
7018 __ xorl(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
7019 }
7020 }
7021 } else {
7022 DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimLong);
7023 if (second.IsRegisterPair()) {
7024 if (instruction->IsAnd()) {
7025 __ andl(first.AsRegisterPairLow<Register>(), second.AsRegisterPairLow<Register>());
7026 __ andl(first.AsRegisterPairHigh<Register>(), second.AsRegisterPairHigh<Register>());
7027 } else if (instruction->IsOr()) {
7028 __ orl(first.AsRegisterPairLow<Register>(), second.AsRegisterPairLow<Register>());
7029 __ orl(first.AsRegisterPairHigh<Register>(), second.AsRegisterPairHigh<Register>());
7030 } else {
7031 DCHECK(instruction->IsXor());
7032 __ xorl(first.AsRegisterPairLow<Register>(), second.AsRegisterPairLow<Register>());
7033 __ xorl(first.AsRegisterPairHigh<Register>(), second.AsRegisterPairHigh<Register>());
7034 }
7035 } else if (second.IsDoubleStackSlot()) {
7036 if (instruction->IsAnd()) {
7037 __ andl(first.AsRegisterPairLow<Register>(), Address(ESP, second.GetStackIndex()));
7038 __ andl(first.AsRegisterPairHigh<Register>(),
7039 Address(ESP, second.GetHighStackIndex(kX86WordSize)));
7040 } else if (instruction->IsOr()) {
7041 __ orl(first.AsRegisterPairLow<Register>(), Address(ESP, second.GetStackIndex()));
7042 __ orl(first.AsRegisterPairHigh<Register>(),
7043 Address(ESP, second.GetHighStackIndex(kX86WordSize)));
7044 } else {
7045 DCHECK(instruction->IsXor());
7046 __ xorl(first.AsRegisterPairLow<Register>(), Address(ESP, second.GetStackIndex()));
7047 __ xorl(first.AsRegisterPairHigh<Register>(),
7048 Address(ESP, second.GetHighStackIndex(kX86WordSize)));
7049 }
7050 } else {
7051 DCHECK(second.IsConstant()) << second;
7052 int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
7053 int32_t low_value = Low32Bits(value);
7054 int32_t high_value = High32Bits(value);
7055 Immediate low(low_value);
7056 Immediate high(high_value);
7057 Register first_low = first.AsRegisterPairLow<Register>();
7058 Register first_high = first.AsRegisterPairHigh<Register>();
7059 if (instruction->IsAnd()) {
7060 if (low_value == 0) {
7061 __ xorl(first_low, first_low);
7062 } else if (low_value != -1) {
7063 __ andl(first_low, low);
7064 }
7065 if (high_value == 0) {
7066 __ xorl(first_high, first_high);
7067 } else if (high_value != -1) {
7068 __ andl(first_high, high);
7069 }
7070 } else if (instruction->IsOr()) {
7071 if (low_value != 0) {
7072 __ orl(first_low, low);
7073 }
7074 if (high_value != 0) {
7075 __ orl(first_high, high);
7076 }
7077 } else {
7078 DCHECK(instruction->IsXor());
7079 if (low_value != 0) {
7080 __ xorl(first_low, low);
7081 }
7082 if (high_value != 0) {
7083 __ xorl(first_high, high);
7084 }
7085 }
7086 }
7087 }
7088 }
7089
GenerateReferenceLoadOneRegister(HInstruction * instruction,Location out,uint32_t offset,Location maybe_temp,ReadBarrierOption read_barrier_option)7090 void InstructionCodeGeneratorX86::GenerateReferenceLoadOneRegister(
7091 HInstruction* instruction,
7092 Location out,
7093 uint32_t offset,
7094 Location maybe_temp,
7095 ReadBarrierOption read_barrier_option) {
7096 Register out_reg = out.AsRegister<Register>();
7097 if (read_barrier_option == kWithReadBarrier) {
7098 CHECK(kEmitCompilerReadBarrier);
7099 if (kUseBakerReadBarrier) {
7100 // Load with fast path based Baker's read barrier.
7101 // /* HeapReference<Object> */ out = *(out + offset)
7102 codegen_->GenerateFieldLoadWithBakerReadBarrier(
7103 instruction, out, out_reg, offset, /* needs_null_check */ false);
7104 } else {
7105 // Load with slow path based read barrier.
7106 // Save the value of `out` into `maybe_temp` before overwriting it
7107 // in the following move operation, as we will need it for the
7108 // read barrier below.
7109 DCHECK(maybe_temp.IsRegister()) << maybe_temp;
7110 __ movl(maybe_temp.AsRegister<Register>(), out_reg);
7111 // /* HeapReference<Object> */ out = *(out + offset)
7112 __ movl(out_reg, Address(out_reg, offset));
7113 codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset);
7114 }
7115 } else {
7116 // Plain load with no read barrier.
7117 // /* HeapReference<Object> */ out = *(out + offset)
7118 __ movl(out_reg, Address(out_reg, offset));
7119 __ MaybeUnpoisonHeapReference(out_reg);
7120 }
7121 }
7122
GenerateReferenceLoadTwoRegisters(HInstruction * instruction,Location out,Location obj,uint32_t offset,ReadBarrierOption read_barrier_option)7123 void InstructionCodeGeneratorX86::GenerateReferenceLoadTwoRegisters(
7124 HInstruction* instruction,
7125 Location out,
7126 Location obj,
7127 uint32_t offset,
7128 ReadBarrierOption read_barrier_option) {
7129 Register out_reg = out.AsRegister<Register>();
7130 Register obj_reg = obj.AsRegister<Register>();
7131 if (read_barrier_option == kWithReadBarrier) {
7132 CHECK(kEmitCompilerReadBarrier);
7133 if (kUseBakerReadBarrier) {
7134 // Load with fast path based Baker's read barrier.
7135 // /* HeapReference<Object> */ out = *(obj + offset)
7136 codegen_->GenerateFieldLoadWithBakerReadBarrier(
7137 instruction, out, obj_reg, offset, /* needs_null_check */ false);
7138 } else {
7139 // Load with slow path based read barrier.
7140 // /* HeapReference<Object> */ out = *(obj + offset)
7141 __ movl(out_reg, Address(obj_reg, offset));
7142 codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset);
7143 }
7144 } else {
7145 // Plain load with no read barrier.
7146 // /* HeapReference<Object> */ out = *(obj + offset)
7147 __ movl(out_reg, Address(obj_reg, offset));
7148 __ MaybeUnpoisonHeapReference(out_reg);
7149 }
7150 }
7151
GenerateGcRootFieldLoad(HInstruction * instruction,Location root,const Address & address,Label * fixup_label,ReadBarrierOption read_barrier_option)7152 void InstructionCodeGeneratorX86::GenerateGcRootFieldLoad(
7153 HInstruction* instruction,
7154 Location root,
7155 const Address& address,
7156 Label* fixup_label,
7157 ReadBarrierOption read_barrier_option) {
7158 Register root_reg = root.AsRegister<Register>();
7159 if (read_barrier_option == kWithReadBarrier) {
7160 DCHECK(kEmitCompilerReadBarrier);
7161 if (kUseBakerReadBarrier) {
7162 // Fast path implementation of art::ReadBarrier::BarrierForRoot when
7163 // Baker's read barrier are used:
7164 //
7165 // root = obj.field;
7166 // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
7167 // if (temp != null) {
7168 // root = temp(root)
7169 // }
7170
7171 // /* GcRoot<mirror::Object> */ root = *address
7172 __ movl(root_reg, address);
7173 if (fixup_label != nullptr) {
7174 __ Bind(fixup_label);
7175 }
7176 static_assert(
7177 sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
7178 "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
7179 "have different sizes.");
7180 static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
7181 "art::mirror::CompressedReference<mirror::Object> and int32_t "
7182 "have different sizes.");
7183
7184 // Slow path marking the GC root `root`.
7185 SlowPathCode* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86(
7186 instruction, root, /* unpoison_ref_before_marking */ false);
7187 codegen_->AddSlowPath(slow_path);
7188
7189 // Test the entrypoint (`Thread::Current()->pReadBarrierMarkReg ## root.reg()`).
7190 const int32_t entry_point_offset =
7191 CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86PointerSize>(root.reg());
7192 __ fs()->cmpl(Address::Absolute(entry_point_offset), Immediate(0));
7193 // The entrypoint is null when the GC is not marking.
7194 __ j(kNotEqual, slow_path->GetEntryLabel());
7195 __ Bind(slow_path->GetExitLabel());
7196 } else {
7197 // GC root loaded through a slow path for read barriers other
7198 // than Baker's.
7199 // /* GcRoot<mirror::Object>* */ root = address
7200 __ leal(root_reg, address);
7201 if (fixup_label != nullptr) {
7202 __ Bind(fixup_label);
7203 }
7204 // /* mirror::Object* */ root = root->Read()
7205 codegen_->GenerateReadBarrierForRootSlow(instruction, root, root);
7206 }
7207 } else {
7208 // Plain GC root load with no read barrier.
7209 // /* GcRoot<mirror::Object> */ root = *address
7210 __ movl(root_reg, address);
7211 if (fixup_label != nullptr) {
7212 __ Bind(fixup_label);
7213 }
7214 // Note that GC roots are not affected by heap poisoning, thus we
7215 // do not have to unpoison `root_reg` here.
7216 }
7217 }
7218
GenerateFieldLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,Register obj,uint32_t offset,bool needs_null_check)7219 void CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
7220 Location ref,
7221 Register obj,
7222 uint32_t offset,
7223 bool needs_null_check) {
7224 DCHECK(kEmitCompilerReadBarrier);
7225 DCHECK(kUseBakerReadBarrier);
7226
7227 // /* HeapReference<Object> */ ref = *(obj + offset)
7228 Address src(obj, offset);
7229 GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check);
7230 }
7231
GenerateArrayLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,Register obj,uint32_t data_offset,Location index,bool needs_null_check)7232 void CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
7233 Location ref,
7234 Register obj,
7235 uint32_t data_offset,
7236 Location index,
7237 bool needs_null_check) {
7238 DCHECK(kEmitCompilerReadBarrier);
7239 DCHECK(kUseBakerReadBarrier);
7240
7241 static_assert(
7242 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
7243 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
7244 // /* HeapReference<Object> */ ref =
7245 // *(obj + data_offset + index * sizeof(HeapReference<Object>))
7246 Address src = CodeGeneratorX86::ArrayAddress(obj, index, TIMES_4, data_offset);
7247 GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check);
7248 }
7249
GenerateReferenceLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,Register obj,const Address & src,bool needs_null_check,bool always_update_field,Register * temp)7250 void CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
7251 Location ref,
7252 Register obj,
7253 const Address& src,
7254 bool needs_null_check,
7255 bool always_update_field,
7256 Register* temp) {
7257 DCHECK(kEmitCompilerReadBarrier);
7258 DCHECK(kUseBakerReadBarrier);
7259
7260 // In slow path based read barriers, the read barrier call is
7261 // inserted after the original load. However, in fast path based
7262 // Baker's read barriers, we need to perform the load of
7263 // mirror::Object::monitor_ *before* the original reference load.
7264 // This load-load ordering is required by the read barrier.
7265 // The fast path/slow path (for Baker's algorithm) should look like:
7266 //
7267 // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
7268 // lfence; // Load fence or artificial data dependency to prevent load-load reordering
7269 // HeapReference<Object> ref = *src; // Original reference load.
7270 // bool is_gray = (rb_state == ReadBarrier::GrayState());
7271 // if (is_gray) {
7272 // ref = ReadBarrier::Mark(ref); // Performed by runtime entrypoint slow path.
7273 // }
7274 //
7275 // Note: the original implementation in ReadBarrier::Barrier is
7276 // slightly more complex as:
7277 // - it implements the load-load fence using a data dependency on
7278 // the high-bits of rb_state, which are expected to be all zeroes
7279 // (we use CodeGeneratorX86::GenerateMemoryBarrier instead here,
7280 // which is a no-op thanks to the x86 memory model);
7281 // - it performs additional checks that we do not do here for
7282 // performance reasons.
7283
7284 Register ref_reg = ref.AsRegister<Register>();
7285 uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
7286
7287 // Given the numeric representation, it's enough to check the low bit of the rb_state.
7288 static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
7289 static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
7290 constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte;
7291 constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte;
7292 constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position);
7293
7294 // if (rb_state == ReadBarrier::GrayState())
7295 // ref = ReadBarrier::Mark(ref);
7296 // At this point, just do the "if" and make sure that flags are preserved until the branch.
7297 __ testb(Address(obj, monitor_offset + gray_byte_position), Immediate(test_value));
7298 if (needs_null_check) {
7299 MaybeRecordImplicitNullCheck(instruction);
7300 }
7301
7302 // Load fence to prevent load-load reordering.
7303 // Note that this is a no-op, thanks to the x86 memory model.
7304 GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
7305
7306 // The actual reference load.
7307 // /* HeapReference<Object> */ ref = *src
7308 __ movl(ref_reg, src); // Flags are unaffected.
7309
7310 // Note: Reference unpoisoning modifies the flags, so we need to delay it after the branch.
7311 // Slow path marking the object `ref` when it is gray.
7312 SlowPathCode* slow_path;
7313 if (always_update_field) {
7314 DCHECK(temp != nullptr);
7315 slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkAndUpdateFieldSlowPathX86(
7316 instruction, ref, obj, src, /* unpoison_ref_before_marking */ true, *temp);
7317 } else {
7318 slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86(
7319 instruction, ref, /* unpoison_ref_before_marking */ true);
7320 }
7321 AddSlowPath(slow_path);
7322
7323 // We have done the "if" of the gray bit check above, now branch based on the flags.
7324 __ j(kNotZero, slow_path->GetEntryLabel());
7325
7326 // Object* ref = ref_addr->AsMirrorPtr()
7327 __ MaybeUnpoisonHeapReference(ref_reg);
7328
7329 __ Bind(slow_path->GetExitLabel());
7330 }
7331
GenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)7332 void CodeGeneratorX86::GenerateReadBarrierSlow(HInstruction* instruction,
7333 Location out,
7334 Location ref,
7335 Location obj,
7336 uint32_t offset,
7337 Location index) {
7338 DCHECK(kEmitCompilerReadBarrier);
7339
7340 // Insert a slow path based read barrier *after* the reference load.
7341 //
7342 // If heap poisoning is enabled, the unpoisoning of the loaded
7343 // reference will be carried out by the runtime within the slow
7344 // path.
7345 //
7346 // Note that `ref` currently does not get unpoisoned (when heap
7347 // poisoning is enabled), which is alright as the `ref` argument is
7348 // not used by the artReadBarrierSlow entry point.
7349 //
7350 // TODO: Unpoison `ref` when it is used by artReadBarrierSlow.
7351 SlowPathCode* slow_path = new (GetGraph()->GetArena())
7352 ReadBarrierForHeapReferenceSlowPathX86(instruction, out, ref, obj, offset, index);
7353 AddSlowPath(slow_path);
7354
7355 __ jmp(slow_path->GetEntryLabel());
7356 __ Bind(slow_path->GetExitLabel());
7357 }
7358
MaybeGenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)7359 void CodeGeneratorX86::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
7360 Location out,
7361 Location ref,
7362 Location obj,
7363 uint32_t offset,
7364 Location index) {
7365 if (kEmitCompilerReadBarrier) {
7366 // Baker's read barriers shall be handled by the fast path
7367 // (CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier).
7368 DCHECK(!kUseBakerReadBarrier);
7369 // If heap poisoning is enabled, unpoisoning will be taken care of
7370 // by the runtime within the slow path.
7371 GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index);
7372 } else if (kPoisonHeapReferences) {
7373 __ UnpoisonHeapReference(out.AsRegister<Register>());
7374 }
7375 }
7376
GenerateReadBarrierForRootSlow(HInstruction * instruction,Location out,Location root)7377 void CodeGeneratorX86::GenerateReadBarrierForRootSlow(HInstruction* instruction,
7378 Location out,
7379 Location root) {
7380 DCHECK(kEmitCompilerReadBarrier);
7381
7382 // Insert a slow path based read barrier *after* the GC root load.
7383 //
7384 // Note that GC roots are not affected by heap poisoning, so we do
7385 // not need to do anything special for this here.
7386 SlowPathCode* slow_path =
7387 new (GetGraph()->GetArena()) ReadBarrierForRootSlowPathX86(instruction, out, root);
7388 AddSlowPath(slow_path);
7389
7390 __ jmp(slow_path->GetEntryLabel());
7391 __ Bind(slow_path->GetExitLabel());
7392 }
7393
VisitBoundType(HBoundType * instruction ATTRIBUTE_UNUSED)7394 void LocationsBuilderX86::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
7395 // Nothing to do, this should be removed during prepare for register allocator.
7396 LOG(FATAL) << "Unreachable";
7397 }
7398
VisitBoundType(HBoundType * instruction ATTRIBUTE_UNUSED)7399 void InstructionCodeGeneratorX86::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
7400 // Nothing to do, this should be removed during prepare for register allocator.
7401 LOG(FATAL) << "Unreachable";
7402 }
7403
7404 // Simple implementation of packed switch - generate cascaded compare/jumps.
VisitPackedSwitch(HPackedSwitch * switch_instr)7405 void LocationsBuilderX86::VisitPackedSwitch(HPackedSwitch* switch_instr) {
7406 LocationSummary* locations =
7407 new (GetGraph()->GetArena()) LocationSummary(switch_instr, LocationSummary::kNoCall);
7408 locations->SetInAt(0, Location::RequiresRegister());
7409 }
7410
GenPackedSwitchWithCompares(Register value_reg,int32_t lower_bound,uint32_t num_entries,HBasicBlock * switch_block,HBasicBlock * default_block)7411 void InstructionCodeGeneratorX86::GenPackedSwitchWithCompares(Register value_reg,
7412 int32_t lower_bound,
7413 uint32_t num_entries,
7414 HBasicBlock* switch_block,
7415 HBasicBlock* default_block) {
7416 // Figure out the correct compare values and jump conditions.
7417 // Handle the first compare/branch as a special case because it might
7418 // jump to the default case.
7419 DCHECK_GT(num_entries, 2u);
7420 Condition first_condition;
7421 uint32_t index;
7422 const ArenaVector<HBasicBlock*>& successors = switch_block->GetSuccessors();
7423 if (lower_bound != 0) {
7424 first_condition = kLess;
7425 __ cmpl(value_reg, Immediate(lower_bound));
7426 __ j(first_condition, codegen_->GetLabelOf(default_block));
7427 __ j(kEqual, codegen_->GetLabelOf(successors[0]));
7428
7429 index = 1;
7430 } else {
7431 // Handle all the compare/jumps below.
7432 first_condition = kBelow;
7433 index = 0;
7434 }
7435
7436 // Handle the rest of the compare/jumps.
7437 for (; index + 1 < num_entries; index += 2) {
7438 int32_t compare_to_value = lower_bound + index + 1;
7439 __ cmpl(value_reg, Immediate(compare_to_value));
7440 // Jump to successors[index] if value < case_value[index].
7441 __ j(first_condition, codegen_->GetLabelOf(successors[index]));
7442 // Jump to successors[index + 1] if value == case_value[index + 1].
7443 __ j(kEqual, codegen_->GetLabelOf(successors[index + 1]));
7444 }
7445
7446 if (index != num_entries) {
7447 // There are an odd number of entries. Handle the last one.
7448 DCHECK_EQ(index + 1, num_entries);
7449 __ cmpl(value_reg, Immediate(lower_bound + index));
7450 __ j(kEqual, codegen_->GetLabelOf(successors[index]));
7451 }
7452
7453 // And the default for any other value.
7454 if (!codegen_->GoesToNextBlock(switch_block, default_block)) {
7455 __ jmp(codegen_->GetLabelOf(default_block));
7456 }
7457 }
7458
VisitPackedSwitch(HPackedSwitch * switch_instr)7459 void InstructionCodeGeneratorX86::VisitPackedSwitch(HPackedSwitch* switch_instr) {
7460 int32_t lower_bound = switch_instr->GetStartValue();
7461 uint32_t num_entries = switch_instr->GetNumEntries();
7462 LocationSummary* locations = switch_instr->GetLocations();
7463 Register value_reg = locations->InAt(0).AsRegister<Register>();
7464
7465 GenPackedSwitchWithCompares(value_reg,
7466 lower_bound,
7467 num_entries,
7468 switch_instr->GetBlock(),
7469 switch_instr->GetDefaultBlock());
7470 }
7471
VisitX86PackedSwitch(HX86PackedSwitch * switch_instr)7472 void LocationsBuilderX86::VisitX86PackedSwitch(HX86PackedSwitch* switch_instr) {
7473 LocationSummary* locations =
7474 new (GetGraph()->GetArena()) LocationSummary(switch_instr, LocationSummary::kNoCall);
7475 locations->SetInAt(0, Location::RequiresRegister());
7476
7477 // Constant area pointer.
7478 locations->SetInAt(1, Location::RequiresRegister());
7479
7480 // And the temporary we need.
7481 locations->AddTemp(Location::RequiresRegister());
7482 }
7483
VisitX86PackedSwitch(HX86PackedSwitch * switch_instr)7484 void InstructionCodeGeneratorX86::VisitX86PackedSwitch(HX86PackedSwitch* switch_instr) {
7485 int32_t lower_bound = switch_instr->GetStartValue();
7486 uint32_t num_entries = switch_instr->GetNumEntries();
7487 LocationSummary* locations = switch_instr->GetLocations();
7488 Register value_reg = locations->InAt(0).AsRegister<Register>();
7489 HBasicBlock* default_block = switch_instr->GetDefaultBlock();
7490
7491 if (num_entries <= kPackedSwitchJumpTableThreshold) {
7492 GenPackedSwitchWithCompares(value_reg,
7493 lower_bound,
7494 num_entries,
7495 switch_instr->GetBlock(),
7496 default_block);
7497 return;
7498 }
7499
7500 // Optimizing has a jump area.
7501 Register temp_reg = locations->GetTemp(0).AsRegister<Register>();
7502 Register constant_area = locations->InAt(1).AsRegister<Register>();
7503
7504 // Remove the bias, if needed.
7505 if (lower_bound != 0) {
7506 __ leal(temp_reg, Address(value_reg, -lower_bound));
7507 value_reg = temp_reg;
7508 }
7509
7510 // Is the value in range?
7511 DCHECK_GE(num_entries, 1u);
7512 __ cmpl(value_reg, Immediate(num_entries - 1));
7513 __ j(kAbove, codegen_->GetLabelOf(default_block));
7514
7515 // We are in the range of the table.
7516 // Load (target-constant_area) from the jump table, indexing by the value.
7517 __ movl(temp_reg, codegen_->LiteralCaseTable(switch_instr, constant_area, value_reg));
7518
7519 // Compute the actual target address by adding in constant_area.
7520 __ addl(temp_reg, constant_area);
7521
7522 // And jump.
7523 __ jmp(temp_reg);
7524 }
7525
VisitX86ComputeBaseMethodAddress(HX86ComputeBaseMethodAddress * insn)7526 void LocationsBuilderX86::VisitX86ComputeBaseMethodAddress(
7527 HX86ComputeBaseMethodAddress* insn) {
7528 LocationSummary* locations =
7529 new (GetGraph()->GetArena()) LocationSummary(insn, LocationSummary::kNoCall);
7530 locations->SetOut(Location::RequiresRegister());
7531 }
7532
VisitX86ComputeBaseMethodAddress(HX86ComputeBaseMethodAddress * insn)7533 void InstructionCodeGeneratorX86::VisitX86ComputeBaseMethodAddress(
7534 HX86ComputeBaseMethodAddress* insn) {
7535 LocationSummary* locations = insn->GetLocations();
7536 Register reg = locations->Out().AsRegister<Register>();
7537
7538 // Generate call to next instruction.
7539 Label next_instruction;
7540 __ call(&next_instruction);
7541 __ Bind(&next_instruction);
7542
7543 // Remember this offset for later use with constant area.
7544 codegen_->AddMethodAddressOffset(insn, GetAssembler()->CodeSize());
7545
7546 // Grab the return address off the stack.
7547 __ popl(reg);
7548 }
7549
VisitX86LoadFromConstantTable(HX86LoadFromConstantTable * insn)7550 void LocationsBuilderX86::VisitX86LoadFromConstantTable(
7551 HX86LoadFromConstantTable* insn) {
7552 LocationSummary* locations =
7553 new (GetGraph()->GetArena()) LocationSummary(insn, LocationSummary::kNoCall);
7554
7555 locations->SetInAt(0, Location::RequiresRegister());
7556 locations->SetInAt(1, Location::ConstantLocation(insn->GetConstant()));
7557
7558 // If we don't need to be materialized, we only need the inputs to be set.
7559 if (insn->IsEmittedAtUseSite()) {
7560 return;
7561 }
7562
7563 switch (insn->GetType()) {
7564 case Primitive::kPrimFloat:
7565 case Primitive::kPrimDouble:
7566 locations->SetOut(Location::RequiresFpuRegister());
7567 break;
7568
7569 case Primitive::kPrimInt:
7570 locations->SetOut(Location::RequiresRegister());
7571 break;
7572
7573 default:
7574 LOG(FATAL) << "Unsupported x86 constant area type " << insn->GetType();
7575 }
7576 }
7577
VisitX86LoadFromConstantTable(HX86LoadFromConstantTable * insn)7578 void InstructionCodeGeneratorX86::VisitX86LoadFromConstantTable(HX86LoadFromConstantTable* insn) {
7579 if (insn->IsEmittedAtUseSite()) {
7580 return;
7581 }
7582
7583 LocationSummary* locations = insn->GetLocations();
7584 Location out = locations->Out();
7585 Register const_area = locations->InAt(0).AsRegister<Register>();
7586 HConstant *value = insn->GetConstant();
7587
7588 switch (insn->GetType()) {
7589 case Primitive::kPrimFloat:
7590 __ movss(out.AsFpuRegister<XmmRegister>(),
7591 codegen_->LiteralFloatAddress(
7592 value->AsFloatConstant()->GetValue(), insn->GetBaseMethodAddress(), const_area));
7593 break;
7594
7595 case Primitive::kPrimDouble:
7596 __ movsd(out.AsFpuRegister<XmmRegister>(),
7597 codegen_->LiteralDoubleAddress(
7598 value->AsDoubleConstant()->GetValue(), insn->GetBaseMethodAddress(), const_area));
7599 break;
7600
7601 case Primitive::kPrimInt:
7602 __ movl(out.AsRegister<Register>(),
7603 codegen_->LiteralInt32Address(
7604 value->AsIntConstant()->GetValue(), insn->GetBaseMethodAddress(), const_area));
7605 break;
7606
7607 default:
7608 LOG(FATAL) << "Unsupported x86 constant area type " << insn->GetType();
7609 }
7610 }
7611
7612 /**
7613 * Class to handle late fixup of offsets into constant area.
7614 */
7615 class RIPFixup : public AssemblerFixup, public ArenaObject<kArenaAllocCodeGenerator> {
7616 public:
RIPFixup(CodeGeneratorX86 & codegen,HX86ComputeBaseMethodAddress * base_method_address,size_t offset)7617 RIPFixup(CodeGeneratorX86& codegen,
7618 HX86ComputeBaseMethodAddress* base_method_address,
7619 size_t offset)
7620 : codegen_(&codegen),
7621 base_method_address_(base_method_address),
7622 offset_into_constant_area_(offset) {}
7623
7624 protected:
SetOffset(size_t offset)7625 void SetOffset(size_t offset) { offset_into_constant_area_ = offset; }
7626
7627 CodeGeneratorX86* codegen_;
7628 HX86ComputeBaseMethodAddress* base_method_address_;
7629
7630 private:
Process(const MemoryRegion & region,int pos)7631 void Process(const MemoryRegion& region, int pos) OVERRIDE {
7632 // Patch the correct offset for the instruction. The place to patch is the
7633 // last 4 bytes of the instruction.
7634 // The value to patch is the distance from the offset in the constant area
7635 // from the address computed by the HX86ComputeBaseMethodAddress instruction.
7636 int32_t constant_offset = codegen_->ConstantAreaStart() + offset_into_constant_area_;
7637 int32_t relative_position =
7638 constant_offset - codegen_->GetMethodAddressOffset(base_method_address_);
7639
7640 // Patch in the right value.
7641 region.StoreUnaligned<int32_t>(pos - 4, relative_position);
7642 }
7643
7644 // Location in constant area that the fixup refers to.
7645 int32_t offset_into_constant_area_;
7646 };
7647
7648 /**
7649 * Class to handle late fixup of offsets to a jump table that will be created in the
7650 * constant area.
7651 */
7652 class JumpTableRIPFixup : public RIPFixup {
7653 public:
JumpTableRIPFixup(CodeGeneratorX86 & codegen,HX86PackedSwitch * switch_instr)7654 JumpTableRIPFixup(CodeGeneratorX86& codegen, HX86PackedSwitch* switch_instr)
7655 : RIPFixup(codegen, switch_instr->GetBaseMethodAddress(), static_cast<size_t>(-1)),
7656 switch_instr_(switch_instr) {}
7657
CreateJumpTable()7658 void CreateJumpTable() {
7659 X86Assembler* assembler = codegen_->GetAssembler();
7660
7661 // Ensure that the reference to the jump table has the correct offset.
7662 const int32_t offset_in_constant_table = assembler->ConstantAreaSize();
7663 SetOffset(offset_in_constant_table);
7664
7665 // The label values in the jump table are computed relative to the
7666 // instruction addressing the constant area.
7667 const int32_t relative_offset = codegen_->GetMethodAddressOffset(base_method_address_);
7668
7669 // Populate the jump table with the correct values for the jump table.
7670 int32_t num_entries = switch_instr_->GetNumEntries();
7671 HBasicBlock* block = switch_instr_->GetBlock();
7672 const ArenaVector<HBasicBlock*>& successors = block->GetSuccessors();
7673 // The value that we want is the target offset - the position of the table.
7674 for (int32_t i = 0; i < num_entries; i++) {
7675 HBasicBlock* b = successors[i];
7676 Label* l = codegen_->GetLabelOf(b);
7677 DCHECK(l->IsBound());
7678 int32_t offset_to_block = l->Position() - relative_offset;
7679 assembler->AppendInt32(offset_to_block);
7680 }
7681 }
7682
7683 private:
7684 const HX86PackedSwitch* switch_instr_;
7685 };
7686
Finalize(CodeAllocator * allocator)7687 void CodeGeneratorX86::Finalize(CodeAllocator* allocator) {
7688 // Generate the constant area if needed.
7689 X86Assembler* assembler = GetAssembler();
7690 if (!assembler->IsConstantAreaEmpty() || !fixups_to_jump_tables_.empty()) {
7691 // Align to 4 byte boundary to reduce cache misses, as the data is 4 and 8
7692 // byte values.
7693 assembler->Align(4, 0);
7694 constant_area_start_ = assembler->CodeSize();
7695
7696 // Populate any jump tables.
7697 for (auto jump_table : fixups_to_jump_tables_) {
7698 jump_table->CreateJumpTable();
7699 }
7700
7701 // And now add the constant area to the generated code.
7702 assembler->AddConstantArea();
7703 }
7704
7705 // And finish up.
7706 CodeGenerator::Finalize(allocator);
7707 }
7708
LiteralDoubleAddress(double v,HX86ComputeBaseMethodAddress * method_base,Register reg)7709 Address CodeGeneratorX86::LiteralDoubleAddress(double v,
7710 HX86ComputeBaseMethodAddress* method_base,
7711 Register reg) {
7712 AssemblerFixup* fixup =
7713 new (GetGraph()->GetArena()) RIPFixup(*this, method_base, __ AddDouble(v));
7714 return Address(reg, kDummy32BitOffset, fixup);
7715 }
7716
LiteralFloatAddress(float v,HX86ComputeBaseMethodAddress * method_base,Register reg)7717 Address CodeGeneratorX86::LiteralFloatAddress(float v,
7718 HX86ComputeBaseMethodAddress* method_base,
7719 Register reg) {
7720 AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, method_base, __ AddFloat(v));
7721 return Address(reg, kDummy32BitOffset, fixup);
7722 }
7723
LiteralInt32Address(int32_t v,HX86ComputeBaseMethodAddress * method_base,Register reg)7724 Address CodeGeneratorX86::LiteralInt32Address(int32_t v,
7725 HX86ComputeBaseMethodAddress* method_base,
7726 Register reg) {
7727 AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, method_base, __ AddInt32(v));
7728 return Address(reg, kDummy32BitOffset, fixup);
7729 }
7730
LiteralInt64Address(int64_t v,HX86ComputeBaseMethodAddress * method_base,Register reg)7731 Address CodeGeneratorX86::LiteralInt64Address(int64_t v,
7732 HX86ComputeBaseMethodAddress* method_base,
7733 Register reg) {
7734 AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, method_base, __ AddInt64(v));
7735 return Address(reg, kDummy32BitOffset, fixup);
7736 }
7737
Load32BitValue(Register dest,int32_t value)7738 void CodeGeneratorX86::Load32BitValue(Register dest, int32_t value) {
7739 if (value == 0) {
7740 __ xorl(dest, dest);
7741 } else {
7742 __ movl(dest, Immediate(value));
7743 }
7744 }
7745
Compare32BitValue(Register dest,int32_t value)7746 void CodeGeneratorX86::Compare32BitValue(Register dest, int32_t value) {
7747 if (value == 0) {
7748 __ testl(dest, dest);
7749 } else {
7750 __ cmpl(dest, Immediate(value));
7751 }
7752 }
7753
GenerateIntCompare(Location lhs,Location rhs)7754 void CodeGeneratorX86::GenerateIntCompare(Location lhs, Location rhs) {
7755 Register lhs_reg = lhs.AsRegister<Register>();
7756 GenerateIntCompare(lhs_reg, rhs);
7757 }
7758
GenerateIntCompare(Register lhs,Location rhs)7759 void CodeGeneratorX86::GenerateIntCompare(Register lhs, Location rhs) {
7760 if (rhs.IsConstant()) {
7761 int32_t value = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
7762 Compare32BitValue(lhs, value);
7763 } else if (rhs.IsStackSlot()) {
7764 __ cmpl(lhs, Address(ESP, rhs.GetStackIndex()));
7765 } else {
7766 __ cmpl(lhs, rhs.AsRegister<Register>());
7767 }
7768 }
7769
ArrayAddress(Register obj,Location index,ScaleFactor scale,uint32_t data_offset)7770 Address CodeGeneratorX86::ArrayAddress(Register obj,
7771 Location index,
7772 ScaleFactor scale,
7773 uint32_t data_offset) {
7774 return index.IsConstant() ?
7775 Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << scale) + data_offset) :
7776 Address(obj, index.AsRegister<Register>(), scale, data_offset);
7777 }
7778
LiteralCaseTable(HX86PackedSwitch * switch_instr,Register reg,Register value)7779 Address CodeGeneratorX86::LiteralCaseTable(HX86PackedSwitch* switch_instr,
7780 Register reg,
7781 Register value) {
7782 // Create a fixup to be used to create and address the jump table.
7783 JumpTableRIPFixup* table_fixup =
7784 new (GetGraph()->GetArena()) JumpTableRIPFixup(*this, switch_instr);
7785
7786 // We have to populate the jump tables.
7787 fixups_to_jump_tables_.push_back(table_fixup);
7788
7789 // We want a scaled address, as we are extracting the correct offset from the table.
7790 return Address(reg, value, TIMES_4, kDummy32BitOffset, table_fixup);
7791 }
7792
7793 // TODO: target as memory.
MoveFromReturnRegister(Location target,Primitive::Type type)7794 void CodeGeneratorX86::MoveFromReturnRegister(Location target, Primitive::Type type) {
7795 if (!target.IsValid()) {
7796 DCHECK_EQ(type, Primitive::kPrimVoid);
7797 return;
7798 }
7799
7800 DCHECK_NE(type, Primitive::kPrimVoid);
7801
7802 Location return_loc = InvokeDexCallingConventionVisitorX86().GetReturnLocation(type);
7803 if (target.Equals(return_loc)) {
7804 return;
7805 }
7806
7807 // TODO: Consider pairs in the parallel move resolver, then this could be nicely merged
7808 // with the else branch.
7809 if (type == Primitive::kPrimLong) {
7810 HParallelMove parallel_move(GetGraph()->GetArena());
7811 parallel_move.AddMove(return_loc.ToLow(), target.ToLow(), Primitive::kPrimInt, nullptr);
7812 parallel_move.AddMove(return_loc.ToHigh(), target.ToHigh(), Primitive::kPrimInt, nullptr);
7813 GetMoveResolver()->EmitNativeCode(¶llel_move);
7814 } else {
7815 // Let the parallel move resolver take care of all of this.
7816 HParallelMove parallel_move(GetGraph()->GetArena());
7817 parallel_move.AddMove(return_loc, target, type, nullptr);
7818 GetMoveResolver()->EmitNativeCode(¶llel_move);
7819 }
7820 }
7821
PatchJitRootUse(uint8_t * code,const uint8_t * roots_data,const PatchInfo<Label> & info,uint64_t index_in_table) const7822 void CodeGeneratorX86::PatchJitRootUse(uint8_t* code,
7823 const uint8_t* roots_data,
7824 const PatchInfo<Label>& info,
7825 uint64_t index_in_table) const {
7826 uint32_t code_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
7827 uintptr_t address =
7828 reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>);
7829 typedef __attribute__((__aligned__(1))) uint32_t unaligned_uint32_t;
7830 reinterpret_cast<unaligned_uint32_t*>(code + code_offset)[0] =
7831 dchecked_integral_cast<uint32_t>(address);
7832 }
7833
EmitJitRootPatches(uint8_t * code,const uint8_t * roots_data)7834 void CodeGeneratorX86::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
7835 for (const PatchInfo<Label>& info : jit_string_patches_) {
7836 const auto& it = jit_string_roots_.find(
7837 StringReference(&info.dex_file, dex::StringIndex(info.index)));
7838 DCHECK(it != jit_string_roots_.end());
7839 PatchJitRootUse(code, roots_data, info, it->second);
7840 }
7841
7842 for (const PatchInfo<Label>& info : jit_class_patches_) {
7843 const auto& it = jit_class_roots_.find(
7844 TypeReference(&info.dex_file, dex::TypeIndex(info.index)));
7845 DCHECK(it != jit_class_roots_.end());
7846 PatchJitRootUse(code, roots_data, info, it->second);
7847 }
7848 }
7849
7850 #undef __
7851
7852 } // namespace x86
7853 } // namespace art
7854