1 /*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "code_generator_x86.h"
18
19 #include "arch/x86/jni_frame_x86.h"
20 #include "art_method-inl.h"
21 #include "class_table.h"
22 #include "code_generator_utils.h"
23 #include "compiled_method.h"
24 #include "entrypoints/quick/quick_entrypoints.h"
25 #include "entrypoints/quick/quick_entrypoints_enum.h"
26 #include "gc/accounting/card_table.h"
27 #include "gc/space/image_space.h"
28 #include "heap_poisoning.h"
29 #include "interpreter/mterp/nterp.h"
30 #include "intrinsics.h"
31 #include "intrinsics_x86.h"
32 #include "jit/profiling_info.h"
33 #include "linker/linker_patch.h"
34 #include "lock_word.h"
35 #include "mirror/array-inl.h"
36 #include "mirror/class-inl.h"
37 #include "mirror/var_handle.h"
38 #include "scoped_thread_state_change-inl.h"
39 #include "thread.h"
40 #include "utils/assembler.h"
41 #include "utils/stack_checks.h"
42 #include "utils/x86/assembler_x86.h"
43 #include "utils/x86/managed_register_x86.h"
44
45 namespace art {
46
47 template<class MirrorType>
48 class GcRoot;
49
50 namespace x86 {
51
52 static constexpr int kCurrentMethodStackOffset = 0;
53 static constexpr Register kMethodRegisterArgument = EAX;
54 static constexpr Register kCoreCalleeSaves[] = { EBP, ESI, EDI };
55
56 static constexpr int kC2ConditionMask = 0x400;
57
58 static constexpr int kFakeReturnRegister = Register(8);
59
60 static constexpr int64_t kDoubleNaN = INT64_C(0x7FF8000000000000);
61 static constexpr int32_t kFloatNaN = INT32_C(0x7FC00000);
62
OneRegInReferenceOutSaveEverythingCallerSaves()63 static RegisterSet OneRegInReferenceOutSaveEverythingCallerSaves() {
64 InvokeRuntimeCallingConvention calling_convention;
65 RegisterSet caller_saves = RegisterSet::Empty();
66 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
67 // TODO: Add GetReturnLocation() to the calling convention so that we can DCHECK()
68 // that the the kPrimNot result register is the same as the first argument register.
69 return caller_saves;
70 }
71
72 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
73 #define __ down_cast<X86Assembler*>(codegen->GetAssembler())-> // NOLINT
74 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kX86PointerSize, x).Int32Value()
75
76 class NullCheckSlowPathX86 : public SlowPathCode {
77 public:
NullCheckSlowPathX86(HNullCheck * instruction)78 explicit NullCheckSlowPathX86(HNullCheck* instruction) : SlowPathCode(instruction) {}
79
EmitNativeCode(CodeGenerator * codegen)80 void EmitNativeCode(CodeGenerator* codegen) override {
81 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
82 __ Bind(GetEntryLabel());
83 if (instruction_->CanThrowIntoCatchBlock()) {
84 // Live registers will be restored in the catch block if caught.
85 SaveLiveRegisters(codegen, instruction_->GetLocations());
86 }
87 x86_codegen->InvokeRuntime(kQuickThrowNullPointer,
88 instruction_,
89 instruction_->GetDexPc(),
90 this);
91 CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
92 }
93
IsFatal() const94 bool IsFatal() const override { return true; }
95
GetDescription() const96 const char* GetDescription() const override { return "NullCheckSlowPathX86"; }
97
98 private:
99 DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathX86);
100 };
101
102 class DivZeroCheckSlowPathX86 : public SlowPathCode {
103 public:
DivZeroCheckSlowPathX86(HDivZeroCheck * instruction)104 explicit DivZeroCheckSlowPathX86(HDivZeroCheck* instruction) : SlowPathCode(instruction) {}
105
EmitNativeCode(CodeGenerator * codegen)106 void EmitNativeCode(CodeGenerator* codegen) override {
107 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
108 __ Bind(GetEntryLabel());
109 x86_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this);
110 CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
111 }
112
IsFatal() const113 bool IsFatal() const override { return true; }
114
GetDescription() const115 const char* GetDescription() const override { return "DivZeroCheckSlowPathX86"; }
116
117 private:
118 DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathX86);
119 };
120
121 class DivRemMinusOneSlowPathX86 : public SlowPathCode {
122 public:
DivRemMinusOneSlowPathX86(HInstruction * instruction,Register reg,bool is_div)123 DivRemMinusOneSlowPathX86(HInstruction* instruction, Register reg, bool is_div)
124 : SlowPathCode(instruction), reg_(reg), is_div_(is_div) {}
125
EmitNativeCode(CodeGenerator * codegen)126 void EmitNativeCode(CodeGenerator* codegen) override {
127 __ Bind(GetEntryLabel());
128 if (is_div_) {
129 __ negl(reg_);
130 } else {
131 __ movl(reg_, Immediate(0));
132 }
133 __ jmp(GetExitLabel());
134 }
135
GetDescription() const136 const char* GetDescription() const override { return "DivRemMinusOneSlowPathX86"; }
137
138 private:
139 Register reg_;
140 bool is_div_;
141 DISALLOW_COPY_AND_ASSIGN(DivRemMinusOneSlowPathX86);
142 };
143
144 class BoundsCheckSlowPathX86 : public SlowPathCode {
145 public:
BoundsCheckSlowPathX86(HBoundsCheck * instruction)146 explicit BoundsCheckSlowPathX86(HBoundsCheck* instruction) : SlowPathCode(instruction) {}
147
EmitNativeCode(CodeGenerator * codegen)148 void EmitNativeCode(CodeGenerator* codegen) override {
149 LocationSummary* locations = instruction_->GetLocations();
150 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
151 __ Bind(GetEntryLabel());
152 // We're moving two locations to locations that could overlap, so we need a parallel
153 // move resolver.
154 if (instruction_->CanThrowIntoCatchBlock()) {
155 // Live registers will be restored in the catch block if caught.
156 SaveLiveRegisters(codegen, instruction_->GetLocations());
157 }
158
159 // Are we using an array length from memory?
160 HInstruction* array_length = instruction_->InputAt(1);
161 Location length_loc = locations->InAt(1);
162 InvokeRuntimeCallingConvention calling_convention;
163 if (array_length->IsArrayLength() && array_length->IsEmittedAtUseSite()) {
164 // Load the array length into our temporary.
165 HArrayLength* length = array_length->AsArrayLength();
166 uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(length);
167 Location array_loc = array_length->GetLocations()->InAt(0);
168 Address array_len(array_loc.AsRegister<Register>(), len_offset);
169 length_loc = Location::RegisterLocation(calling_convention.GetRegisterAt(1));
170 // Check for conflicts with index.
171 if (length_loc.Equals(locations->InAt(0))) {
172 // We know we aren't using parameter 2.
173 length_loc = Location::RegisterLocation(calling_convention.GetRegisterAt(2));
174 }
175 __ movl(length_loc.AsRegister<Register>(), array_len);
176 if (mirror::kUseStringCompression && length->IsStringLength()) {
177 __ shrl(length_loc.AsRegister<Register>(), Immediate(1));
178 }
179 }
180 x86_codegen->EmitParallelMoves(
181 locations->InAt(0),
182 Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
183 DataType::Type::kInt32,
184 length_loc,
185 Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
186 DataType::Type::kInt32);
187 QuickEntrypointEnum entrypoint = instruction_->AsBoundsCheck()->IsStringCharAt()
188 ? kQuickThrowStringBounds
189 : kQuickThrowArrayBounds;
190 x86_codegen->InvokeRuntime(entrypoint, instruction_, instruction_->GetDexPc(), this);
191 CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>();
192 CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
193 }
194
IsFatal() const195 bool IsFatal() const override { return true; }
196
GetDescription() const197 const char* GetDescription() const override { return "BoundsCheckSlowPathX86"; }
198
199 private:
200 DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathX86);
201 };
202
203 class SuspendCheckSlowPathX86 : public SlowPathCode {
204 public:
SuspendCheckSlowPathX86(HSuspendCheck * instruction,HBasicBlock * successor)205 SuspendCheckSlowPathX86(HSuspendCheck* instruction, HBasicBlock* successor)
206 : SlowPathCode(instruction), successor_(successor) {}
207
EmitNativeCode(CodeGenerator * codegen)208 void EmitNativeCode(CodeGenerator* codegen) override {
209 LocationSummary* locations = instruction_->GetLocations();
210 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
211 __ Bind(GetEntryLabel());
212 SaveLiveRegisters(codegen, locations); // Only saves full width XMM for SIMD.
213 x86_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this);
214 CheckEntrypointTypes<kQuickTestSuspend, void, void>();
215 RestoreLiveRegisters(codegen, locations); // Only restores full width XMM for SIMD.
216 if (successor_ == nullptr) {
217 __ jmp(GetReturnLabel());
218 } else {
219 __ jmp(x86_codegen->GetLabelOf(successor_));
220 }
221 }
222
GetReturnLabel()223 Label* GetReturnLabel() {
224 DCHECK(successor_ == nullptr);
225 return &return_label_;
226 }
227
GetSuccessor() const228 HBasicBlock* GetSuccessor() const {
229 return successor_;
230 }
231
GetDescription() const232 const char* GetDescription() const override { return "SuspendCheckSlowPathX86"; }
233
234 private:
235 HBasicBlock* const successor_;
236 Label return_label_;
237
238 DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathX86);
239 };
240
241 class LoadStringSlowPathX86 : public SlowPathCode {
242 public:
LoadStringSlowPathX86(HLoadString * instruction)243 explicit LoadStringSlowPathX86(HLoadString* instruction): SlowPathCode(instruction) {}
244
EmitNativeCode(CodeGenerator * codegen)245 void EmitNativeCode(CodeGenerator* codegen) override {
246 LocationSummary* locations = instruction_->GetLocations();
247 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
248
249 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
250 __ Bind(GetEntryLabel());
251 SaveLiveRegisters(codegen, locations);
252
253 InvokeRuntimeCallingConvention calling_convention;
254 const dex::StringIndex string_index = instruction_->AsLoadString()->GetStringIndex();
255 __ movl(calling_convention.GetRegisterAt(0), Immediate(string_index.index_));
256 x86_codegen->InvokeRuntime(kQuickResolveString, instruction_, instruction_->GetDexPc(), this);
257 CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
258 x86_codegen->Move32(locations->Out(), Location::RegisterLocation(EAX));
259 RestoreLiveRegisters(codegen, locations);
260
261 __ jmp(GetExitLabel());
262 }
263
GetDescription() const264 const char* GetDescription() const override { return "LoadStringSlowPathX86"; }
265
266 private:
267 DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathX86);
268 };
269
270 class LoadClassSlowPathX86 : public SlowPathCode {
271 public:
LoadClassSlowPathX86(HLoadClass * cls,HInstruction * at)272 LoadClassSlowPathX86(HLoadClass* cls, HInstruction* at)
273 : SlowPathCode(at), cls_(cls) {
274 DCHECK(at->IsLoadClass() || at->IsClinitCheck());
275 DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_);
276 }
277
EmitNativeCode(CodeGenerator * codegen)278 void EmitNativeCode(CodeGenerator* codegen) override {
279 LocationSummary* locations = instruction_->GetLocations();
280 Location out = locations->Out();
281 const uint32_t dex_pc = instruction_->GetDexPc();
282 bool must_resolve_type = instruction_->IsLoadClass() && cls_->MustResolveTypeOnSlowPath();
283 bool must_do_clinit = instruction_->IsClinitCheck() || cls_->MustGenerateClinitCheck();
284
285 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
286 __ Bind(GetEntryLabel());
287 SaveLiveRegisters(codegen, locations);
288
289 InvokeRuntimeCallingConvention calling_convention;
290 if (must_resolve_type) {
291 DCHECK(IsSameDexFile(cls_->GetDexFile(), x86_codegen->GetGraph()->GetDexFile()));
292 dex::TypeIndex type_index = cls_->GetTypeIndex();
293 __ movl(calling_convention.GetRegisterAt(0), Immediate(type_index.index_));
294 if (cls_->NeedsAccessCheck()) {
295 CheckEntrypointTypes<kQuickResolveTypeAndVerifyAccess, void*, uint32_t>();
296 x86_codegen->InvokeRuntime(kQuickResolveTypeAndVerifyAccess, instruction_, dex_pc, this);
297 } else {
298 CheckEntrypointTypes<kQuickResolveType, void*, uint32_t>();
299 x86_codegen->InvokeRuntime(kQuickResolveType, instruction_, dex_pc, this);
300 }
301 // If we also must_do_clinit, the resolved type is now in the correct register.
302 } else {
303 DCHECK(must_do_clinit);
304 Location source = instruction_->IsLoadClass() ? out : locations->InAt(0);
305 x86_codegen->Move32(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), source);
306 }
307 if (must_do_clinit) {
308 x86_codegen->InvokeRuntime(kQuickInitializeStaticStorage, instruction_, dex_pc, this);
309 CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, mirror::Class*>();
310 }
311
312 // Move the class to the desired location.
313 if (out.IsValid()) {
314 DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
315 x86_codegen->Move32(out, Location::RegisterLocation(EAX));
316 }
317 RestoreLiveRegisters(codegen, locations);
318 __ jmp(GetExitLabel());
319 }
320
GetDescription() const321 const char* GetDescription() const override { return "LoadClassSlowPathX86"; }
322
323 private:
324 // The class this slow path will load.
325 HLoadClass* const cls_;
326
327 DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathX86);
328 };
329
330 class TypeCheckSlowPathX86 : public SlowPathCode {
331 public:
TypeCheckSlowPathX86(HInstruction * instruction,bool is_fatal)332 TypeCheckSlowPathX86(HInstruction* instruction, bool is_fatal)
333 : SlowPathCode(instruction), is_fatal_(is_fatal) {}
334
EmitNativeCode(CodeGenerator * codegen)335 void EmitNativeCode(CodeGenerator* codegen) override {
336 LocationSummary* locations = instruction_->GetLocations();
337 DCHECK(instruction_->IsCheckCast()
338 || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
339
340 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
341 __ Bind(GetEntryLabel());
342
343 if (kPoisonHeapReferences &&
344 instruction_->IsCheckCast() &&
345 instruction_->AsCheckCast()->GetTypeCheckKind() == TypeCheckKind::kInterfaceCheck) {
346 // First, unpoison the `cls` reference that was poisoned for direct memory comparison.
347 __ UnpoisonHeapReference(locations->InAt(1).AsRegister<Register>());
348 }
349
350 if (!is_fatal_ || instruction_->CanThrowIntoCatchBlock()) {
351 SaveLiveRegisters(codegen, locations);
352 }
353
354 // We're moving two locations to locations that could overlap, so we need a parallel
355 // move resolver.
356 InvokeRuntimeCallingConvention calling_convention;
357 x86_codegen->EmitParallelMoves(locations->InAt(0),
358 Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
359 DataType::Type::kReference,
360 locations->InAt(1),
361 Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
362 DataType::Type::kReference);
363 if (instruction_->IsInstanceOf()) {
364 x86_codegen->InvokeRuntime(kQuickInstanceofNonTrivial,
365 instruction_,
366 instruction_->GetDexPc(),
367 this);
368 CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Object*, mirror::Class*>();
369 } else {
370 DCHECK(instruction_->IsCheckCast());
371 x86_codegen->InvokeRuntime(kQuickCheckInstanceOf,
372 instruction_,
373 instruction_->GetDexPc(),
374 this);
375 CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>();
376 }
377
378 if (!is_fatal_) {
379 if (instruction_->IsInstanceOf()) {
380 x86_codegen->Move32(locations->Out(), Location::RegisterLocation(EAX));
381 }
382 RestoreLiveRegisters(codegen, locations);
383
384 __ jmp(GetExitLabel());
385 }
386 }
387
GetDescription() const388 const char* GetDescription() const override { return "TypeCheckSlowPathX86"; }
IsFatal() const389 bool IsFatal() const override { return is_fatal_; }
390
391 private:
392 const bool is_fatal_;
393
394 DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathX86);
395 };
396
397 class DeoptimizationSlowPathX86 : public SlowPathCode {
398 public:
DeoptimizationSlowPathX86(HDeoptimize * instruction)399 explicit DeoptimizationSlowPathX86(HDeoptimize* instruction)
400 : SlowPathCode(instruction) {}
401
EmitNativeCode(CodeGenerator * codegen)402 void EmitNativeCode(CodeGenerator* codegen) override {
403 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
404 __ Bind(GetEntryLabel());
405 LocationSummary* locations = instruction_->GetLocations();
406 SaveLiveRegisters(codegen, locations);
407 InvokeRuntimeCallingConvention calling_convention;
408 x86_codegen->Load32BitValue(
409 calling_convention.GetRegisterAt(0),
410 static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind()));
411 x86_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this);
412 CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>();
413 }
414
GetDescription() const415 const char* GetDescription() const override { return "DeoptimizationSlowPathX86"; }
416
417 private:
418 DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathX86);
419 };
420
421 class ArraySetSlowPathX86 : public SlowPathCode {
422 public:
ArraySetSlowPathX86(HInstruction * instruction)423 explicit ArraySetSlowPathX86(HInstruction* instruction) : SlowPathCode(instruction) {}
424
EmitNativeCode(CodeGenerator * codegen)425 void EmitNativeCode(CodeGenerator* codegen) override {
426 LocationSummary* locations = instruction_->GetLocations();
427 __ Bind(GetEntryLabel());
428 SaveLiveRegisters(codegen, locations);
429
430 InvokeRuntimeCallingConvention calling_convention;
431 HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
432 parallel_move.AddMove(
433 locations->InAt(0),
434 Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
435 DataType::Type::kReference,
436 nullptr);
437 parallel_move.AddMove(
438 locations->InAt(1),
439 Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
440 DataType::Type::kInt32,
441 nullptr);
442 parallel_move.AddMove(
443 locations->InAt(2),
444 Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
445 DataType::Type::kReference,
446 nullptr);
447 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move);
448
449 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
450 x86_codegen->InvokeRuntime(kQuickAputObject, instruction_, instruction_->GetDexPc(), this);
451 CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
452 RestoreLiveRegisters(codegen, locations);
453 __ jmp(GetExitLabel());
454 }
455
GetDescription() const456 const char* GetDescription() const override { return "ArraySetSlowPathX86"; }
457
458 private:
459 DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86);
460 };
461
462 // Slow path marking an object reference `ref` during a read
463 // barrier. The field `obj.field` in the object `obj` holding this
464 // reference does not get updated by this slow path after marking (see
465 // ReadBarrierMarkAndUpdateFieldSlowPathX86 below for that).
466 //
467 // This means that after the execution of this slow path, `ref` will
468 // always be up-to-date, but `obj.field` may not; i.e., after the
469 // flip, `ref` will be a to-space reference, but `obj.field` will
470 // probably still be a from-space reference (unless it gets updated by
471 // another thread, or if another thread installed another object
472 // reference (different from `ref`) in `obj.field`).
473 class ReadBarrierMarkSlowPathX86 : public SlowPathCode {
474 public:
ReadBarrierMarkSlowPathX86(HInstruction * instruction,Location ref,bool unpoison_ref_before_marking)475 ReadBarrierMarkSlowPathX86(HInstruction* instruction,
476 Location ref,
477 bool unpoison_ref_before_marking)
478 : SlowPathCode(instruction),
479 ref_(ref),
480 unpoison_ref_before_marking_(unpoison_ref_before_marking) {
481 DCHECK(kEmitCompilerReadBarrier);
482 }
483
GetDescription() const484 const char* GetDescription() const override { return "ReadBarrierMarkSlowPathX86"; }
485
EmitNativeCode(CodeGenerator * codegen)486 void EmitNativeCode(CodeGenerator* codegen) override {
487 LocationSummary* locations = instruction_->GetLocations();
488 Register ref_reg = ref_.AsRegister<Register>();
489 DCHECK(locations->CanCall());
490 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
491 DCHECK(instruction_->IsInstanceFieldGet() ||
492 instruction_->IsPredicatedInstanceFieldGet() ||
493 instruction_->IsStaticFieldGet() ||
494 instruction_->IsArrayGet() ||
495 instruction_->IsArraySet() ||
496 instruction_->IsLoadClass() ||
497 instruction_->IsLoadString() ||
498 instruction_->IsInstanceOf() ||
499 instruction_->IsCheckCast() ||
500 (instruction_->IsInvoke() && instruction_->GetLocations()->Intrinsified()))
501 << "Unexpected instruction in read barrier marking slow path: "
502 << instruction_->DebugName();
503
504 __ Bind(GetEntryLabel());
505 if (unpoison_ref_before_marking_) {
506 // Object* ref = ref_addr->AsMirrorPtr()
507 __ MaybeUnpoisonHeapReference(ref_reg);
508 }
509 // No need to save live registers; it's taken care of by the
510 // entrypoint. Also, there is no need to update the stack mask,
511 // as this runtime call will not trigger a garbage collection.
512 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
513 DCHECK_NE(ref_reg, ESP);
514 DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg;
515 // "Compact" slow path, saving two moves.
516 //
517 // Instead of using the standard runtime calling convention (input
518 // and output in EAX):
519 //
520 // EAX <- ref
521 // EAX <- ReadBarrierMark(EAX)
522 // ref <- EAX
523 //
524 // we just use rX (the register containing `ref`) as input and output
525 // of a dedicated entrypoint:
526 //
527 // rX <- ReadBarrierMarkRegX(rX)
528 //
529 int32_t entry_point_offset = Thread::ReadBarrierMarkEntryPointsOffset<kX86PointerSize>(ref_reg);
530 // This runtime call does not require a stack map.
531 x86_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
532 __ jmp(GetExitLabel());
533 }
534
535 private:
536 // The location (register) of the marked object reference.
537 const Location ref_;
538 // Should the reference in `ref_` be unpoisoned prior to marking it?
539 const bool unpoison_ref_before_marking_;
540
541 DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathX86);
542 };
543
544 // Slow path marking an object reference `ref` during a read barrier,
545 // and if needed, atomically updating the field `obj.field` in the
546 // object `obj` holding this reference after marking (contrary to
547 // ReadBarrierMarkSlowPathX86 above, which never tries to update
548 // `obj.field`).
549 //
550 // This means that after the execution of this slow path, both `ref`
551 // and `obj.field` will be up-to-date; i.e., after the flip, both will
552 // hold the same to-space reference (unless another thread installed
553 // another object reference (different from `ref`) in `obj.field`).
554 class ReadBarrierMarkAndUpdateFieldSlowPathX86 : public SlowPathCode {
555 public:
ReadBarrierMarkAndUpdateFieldSlowPathX86(HInstruction * instruction,Location ref,Register obj,const Address & field_addr,bool unpoison_ref_before_marking,Register temp)556 ReadBarrierMarkAndUpdateFieldSlowPathX86(HInstruction* instruction,
557 Location ref,
558 Register obj,
559 const Address& field_addr,
560 bool unpoison_ref_before_marking,
561 Register temp)
562 : SlowPathCode(instruction),
563 ref_(ref),
564 obj_(obj),
565 field_addr_(field_addr),
566 unpoison_ref_before_marking_(unpoison_ref_before_marking),
567 temp_(temp) {
568 DCHECK(kEmitCompilerReadBarrier);
569 }
570
GetDescription() const571 const char* GetDescription() const override { return "ReadBarrierMarkAndUpdateFieldSlowPathX86"; }
572
EmitNativeCode(CodeGenerator * codegen)573 void EmitNativeCode(CodeGenerator* codegen) override {
574 LocationSummary* locations = instruction_->GetLocations();
575 Register ref_reg = ref_.AsRegister<Register>();
576 DCHECK(locations->CanCall());
577 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
578 // This slow path is only used by the UnsafeCASObject intrinsic.
579 DCHECK((instruction_->IsInvoke() && instruction_->GetLocations()->Intrinsified()))
580 << "Unexpected instruction in read barrier marking and field updating slow path: "
581 << instruction_->DebugName();
582 DCHECK(instruction_->GetLocations()->Intrinsified());
583 Intrinsics intrinsic = instruction_->AsInvoke()->GetIntrinsic();
584 static constexpr auto kVarHandleCAS = mirror::VarHandle::AccessModeTemplate::kCompareAndSet;
585 static constexpr auto kVarHandleGetAndSet =
586 mirror::VarHandle::AccessModeTemplate::kGetAndUpdate;
587 static constexpr auto kVarHandleCAX =
588 mirror::VarHandle::AccessModeTemplate::kCompareAndExchange;
589 DCHECK(intrinsic == Intrinsics::kUnsafeCASObject ||
590 mirror::VarHandle::GetAccessModeTemplateByIntrinsic(intrinsic) == kVarHandleCAS ||
591 mirror::VarHandle::GetAccessModeTemplateByIntrinsic(intrinsic) == kVarHandleGetAndSet ||
592 mirror::VarHandle::GetAccessModeTemplateByIntrinsic(intrinsic) == kVarHandleCAX);
593
594 __ Bind(GetEntryLabel());
595 if (unpoison_ref_before_marking_) {
596 // Object* ref = ref_addr->AsMirrorPtr()
597 __ MaybeUnpoisonHeapReference(ref_reg);
598 }
599
600 // Save the old (unpoisoned) reference.
601 __ movl(temp_, ref_reg);
602
603 // No need to save live registers; it's taken care of by the
604 // entrypoint. Also, there is no need to update the stack mask,
605 // as this runtime call will not trigger a garbage collection.
606 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
607 DCHECK_NE(ref_reg, ESP);
608 DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg;
609 // "Compact" slow path, saving two moves.
610 //
611 // Instead of using the standard runtime calling convention (input
612 // and output in EAX):
613 //
614 // EAX <- ref
615 // EAX <- ReadBarrierMark(EAX)
616 // ref <- EAX
617 //
618 // we just use rX (the register containing `ref`) as input and output
619 // of a dedicated entrypoint:
620 //
621 // rX <- ReadBarrierMarkRegX(rX)
622 //
623 int32_t entry_point_offset = Thread::ReadBarrierMarkEntryPointsOffset<kX86PointerSize>(ref_reg);
624 // This runtime call does not require a stack map.
625 x86_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
626
627 // If the new reference is different from the old reference,
628 // update the field in the holder (`*field_addr`).
629 //
630 // Note that this field could also hold a different object, if
631 // another thread had concurrently changed it. In that case, the
632 // LOCK CMPXCHGL instruction in the compare-and-set (CAS)
633 // operation below would abort the CAS, leaving the field as-is.
634 NearLabel done;
635 __ cmpl(temp_, ref_reg);
636 __ j(kEqual, &done);
637
638 // Update the the holder's field atomically. This may fail if
639 // mutator updates before us, but it's OK. This is achieved
640 // using a strong compare-and-set (CAS) operation with relaxed
641 // memory synchronization ordering, where the expected value is
642 // the old reference and the desired value is the new reference.
643 // This operation is implemented with a 32-bit LOCK CMPXLCHG
644 // instruction, which requires the expected value (the old
645 // reference) to be in EAX. Save EAX beforehand, and move the
646 // expected value (stored in `temp_`) into EAX.
647 __ pushl(EAX);
648 __ movl(EAX, temp_);
649
650 // Convenience aliases.
651 Register base = obj_;
652 Register expected = EAX;
653 Register value = ref_reg;
654
655 bool base_equals_value = (base == value);
656 if (kPoisonHeapReferences) {
657 if (base_equals_value) {
658 // If `base` and `value` are the same register location, move
659 // `value` to a temporary register. This way, poisoning
660 // `value` won't invalidate `base`.
661 value = temp_;
662 __ movl(value, base);
663 }
664
665 // Check that the register allocator did not assign the location
666 // of `expected` (EAX) to `value` nor to `base`, so that heap
667 // poisoning (when enabled) works as intended below.
668 // - If `value` were equal to `expected`, both references would
669 // be poisoned twice, meaning they would not be poisoned at
670 // all, as heap poisoning uses address negation.
671 // - If `base` were equal to `expected`, poisoning `expected`
672 // would invalidate `base`.
673 DCHECK_NE(value, expected);
674 DCHECK_NE(base, expected);
675
676 __ PoisonHeapReference(expected);
677 __ PoisonHeapReference(value);
678 }
679
680 __ LockCmpxchgl(field_addr_, value);
681
682 // If heap poisoning is enabled, we need to unpoison the values
683 // that were poisoned earlier.
684 if (kPoisonHeapReferences) {
685 if (base_equals_value) {
686 // `value` has been moved to a temporary register, no need
687 // to unpoison it.
688 } else {
689 __ UnpoisonHeapReference(value);
690 }
691 // No need to unpoison `expected` (EAX), as it is be overwritten below.
692 }
693
694 // Restore EAX.
695 __ popl(EAX);
696
697 __ Bind(&done);
698 __ jmp(GetExitLabel());
699 }
700
701 private:
702 // The location (register) of the marked object reference.
703 const Location ref_;
704 // The register containing the object holding the marked object reference field.
705 const Register obj_;
706 // The address of the marked reference field. The base of this address must be `obj_`.
707 const Address field_addr_;
708
709 // Should the reference in `ref_` be unpoisoned prior to marking it?
710 const bool unpoison_ref_before_marking_;
711
712 const Register temp_;
713
714 DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathX86);
715 };
716
717 // Slow path generating a read barrier for a heap reference.
718 class ReadBarrierForHeapReferenceSlowPathX86 : public SlowPathCode {
719 public:
ReadBarrierForHeapReferenceSlowPathX86(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)720 ReadBarrierForHeapReferenceSlowPathX86(HInstruction* instruction,
721 Location out,
722 Location ref,
723 Location obj,
724 uint32_t offset,
725 Location index)
726 : SlowPathCode(instruction),
727 out_(out),
728 ref_(ref),
729 obj_(obj),
730 offset_(offset),
731 index_(index) {
732 DCHECK(kEmitCompilerReadBarrier);
733 // If `obj` is equal to `out` or `ref`, it means the initial object
734 // has been overwritten by (or after) the heap object reference load
735 // to be instrumented, e.g.:
736 //
737 // __ movl(out, Address(out, offset));
738 // codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset);
739 //
740 // In that case, we have lost the information about the original
741 // object, and the emitted read barrier cannot work properly.
742 DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out;
743 DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref;
744 }
745
EmitNativeCode(CodeGenerator * codegen)746 void EmitNativeCode(CodeGenerator* codegen) override {
747 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
748 LocationSummary* locations = instruction_->GetLocations();
749 Register reg_out = out_.AsRegister<Register>();
750 DCHECK(locations->CanCall());
751 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
752 DCHECK(instruction_->IsInstanceFieldGet() ||
753 instruction_->IsPredicatedInstanceFieldGet() ||
754 instruction_->IsStaticFieldGet() ||
755 instruction_->IsArrayGet() ||
756 instruction_->IsInstanceOf() ||
757 instruction_->IsCheckCast() ||
758 (instruction_->IsInvoke() && instruction_->GetLocations()->Intrinsified()))
759 << "Unexpected instruction in read barrier for heap reference slow path: "
760 << instruction_->DebugName();
761
762 __ Bind(GetEntryLabel());
763 SaveLiveRegisters(codegen, locations);
764
765 // We may have to change the index's value, but as `index_` is a
766 // constant member (like other "inputs" of this slow path),
767 // introduce a copy of it, `index`.
768 Location index = index_;
769 if (index_.IsValid()) {
770 // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics.
771 if (instruction_->IsArrayGet()) {
772 // Compute the actual memory offset and store it in `index`.
773 Register index_reg = index_.AsRegister<Register>();
774 DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_reg));
775 if (codegen->IsCoreCalleeSaveRegister(index_reg)) {
776 // We are about to change the value of `index_reg` (see the
777 // calls to art::x86::X86Assembler::shll and
778 // art::x86::X86Assembler::AddImmediate below), but it has
779 // not been saved by the previous call to
780 // art::SlowPathCode::SaveLiveRegisters, as it is a
781 // callee-save register --
782 // art::SlowPathCode::SaveLiveRegisters does not consider
783 // callee-save registers, as it has been designed with the
784 // assumption that callee-save registers are supposed to be
785 // handled by the called function. So, as a callee-save
786 // register, `index_reg` _would_ eventually be saved onto
787 // the stack, but it would be too late: we would have
788 // changed its value earlier. Therefore, we manually save
789 // it here into another freely available register,
790 // `free_reg`, chosen of course among the caller-save
791 // registers (as a callee-save `free_reg` register would
792 // exhibit the same problem).
793 //
794 // Note we could have requested a temporary register from
795 // the register allocator instead; but we prefer not to, as
796 // this is a slow path, and we know we can find a
797 // caller-save register that is available.
798 Register free_reg = FindAvailableCallerSaveRegister(codegen);
799 __ movl(free_reg, index_reg);
800 index_reg = free_reg;
801 index = Location::RegisterLocation(index_reg);
802 } else {
803 // The initial register stored in `index_` has already been
804 // saved in the call to art::SlowPathCode::SaveLiveRegisters
805 // (as it is not a callee-save register), so we can freely
806 // use it.
807 }
808 // Shifting the index value contained in `index_reg` by the scale
809 // factor (2) cannot overflow in practice, as the runtime is
810 // unable to allocate object arrays with a size larger than
811 // 2^26 - 1 (that is, 2^28 - 4 bytes).
812 __ shll(index_reg, Immediate(TIMES_4));
813 static_assert(
814 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
815 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
816 __ AddImmediate(index_reg, Immediate(offset_));
817 } else {
818 // In the case of the UnsafeGetObject/UnsafeGetObjectVolatile
819 // intrinsics, `index_` is not shifted by a scale factor of 2
820 // (as in the case of ArrayGet), as it is actually an offset
821 // to an object field within an object.
822 DCHECK(instruction_->IsInvoke()) << instruction_->DebugName();
823 DCHECK(instruction_->GetLocations()->Intrinsified());
824 DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) ||
825 (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile))
826 << instruction_->AsInvoke()->GetIntrinsic();
827 DCHECK_EQ(offset_, 0U);
828 DCHECK(index_.IsRegisterPair());
829 // UnsafeGet's offset location is a register pair, the low
830 // part contains the correct offset.
831 index = index_.ToLow();
832 }
833 }
834
835 // We're moving two or three locations to locations that could
836 // overlap, so we need a parallel move resolver.
837 InvokeRuntimeCallingConvention calling_convention;
838 HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
839 parallel_move.AddMove(ref_,
840 Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
841 DataType::Type::kReference,
842 nullptr);
843 parallel_move.AddMove(obj_,
844 Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
845 DataType::Type::kReference,
846 nullptr);
847 if (index.IsValid()) {
848 parallel_move.AddMove(index,
849 Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
850 DataType::Type::kInt32,
851 nullptr);
852 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move);
853 } else {
854 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move);
855 __ movl(calling_convention.GetRegisterAt(2), Immediate(offset_));
856 }
857 x86_codegen->InvokeRuntime(kQuickReadBarrierSlow, instruction_, instruction_->GetDexPc(), this);
858 CheckEntrypointTypes<
859 kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>();
860 x86_codegen->Move32(out_, Location::RegisterLocation(EAX));
861
862 RestoreLiveRegisters(codegen, locations);
863 __ jmp(GetExitLabel());
864 }
865
GetDescription() const866 const char* GetDescription() const override { return "ReadBarrierForHeapReferenceSlowPathX86"; }
867
868 private:
FindAvailableCallerSaveRegister(CodeGenerator * codegen)869 Register FindAvailableCallerSaveRegister(CodeGenerator* codegen) {
870 size_t ref = static_cast<int>(ref_.AsRegister<Register>());
871 size_t obj = static_cast<int>(obj_.AsRegister<Register>());
872 for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
873 if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) {
874 return static_cast<Register>(i);
875 }
876 }
877 // We shall never fail to find a free caller-save register, as
878 // there are more than two core caller-save registers on x86
879 // (meaning it is possible to find one which is different from
880 // `ref` and `obj`).
881 DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u);
882 LOG(FATAL) << "Could not find a free caller-save register";
883 UNREACHABLE();
884 }
885
886 const Location out_;
887 const Location ref_;
888 const Location obj_;
889 const uint32_t offset_;
890 // An additional location containing an index to an array.
891 // Only used for HArrayGet and the UnsafeGetObject &
892 // UnsafeGetObjectVolatile intrinsics.
893 const Location index_;
894
895 DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathX86);
896 };
897
898 // Slow path generating a read barrier for a GC root.
899 class ReadBarrierForRootSlowPathX86 : public SlowPathCode {
900 public:
ReadBarrierForRootSlowPathX86(HInstruction * instruction,Location out,Location root)901 ReadBarrierForRootSlowPathX86(HInstruction* instruction, Location out, Location root)
902 : SlowPathCode(instruction), out_(out), root_(root) {
903 DCHECK(kEmitCompilerReadBarrier);
904 }
905
EmitNativeCode(CodeGenerator * codegen)906 void EmitNativeCode(CodeGenerator* codegen) override {
907 LocationSummary* locations = instruction_->GetLocations();
908 Register reg_out = out_.AsRegister<Register>();
909 DCHECK(locations->CanCall());
910 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
911 DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
912 << "Unexpected instruction in read barrier for GC root slow path: "
913 << instruction_->DebugName();
914
915 __ Bind(GetEntryLabel());
916 SaveLiveRegisters(codegen, locations);
917
918 InvokeRuntimeCallingConvention calling_convention;
919 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
920 x86_codegen->Move32(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), root_);
921 x86_codegen->InvokeRuntime(kQuickReadBarrierForRootSlow,
922 instruction_,
923 instruction_->GetDexPc(),
924 this);
925 CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>();
926 x86_codegen->Move32(out_, Location::RegisterLocation(EAX));
927
928 RestoreLiveRegisters(codegen, locations);
929 __ jmp(GetExitLabel());
930 }
931
GetDescription() const932 const char* GetDescription() const override { return "ReadBarrierForRootSlowPathX86"; }
933
934 private:
935 const Location out_;
936 const Location root_;
937
938 DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathX86);
939 };
940
941 #undef __
942 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
943 #define __ down_cast<X86Assembler*>(GetAssembler())-> // NOLINT
944
X86Condition(IfCondition cond)945 inline Condition X86Condition(IfCondition cond) {
946 switch (cond) {
947 case kCondEQ: return kEqual;
948 case kCondNE: return kNotEqual;
949 case kCondLT: return kLess;
950 case kCondLE: return kLessEqual;
951 case kCondGT: return kGreater;
952 case kCondGE: return kGreaterEqual;
953 case kCondB: return kBelow;
954 case kCondBE: return kBelowEqual;
955 case kCondA: return kAbove;
956 case kCondAE: return kAboveEqual;
957 }
958 LOG(FATAL) << "Unreachable";
959 UNREACHABLE();
960 }
961
962 // Maps signed condition to unsigned condition and FP condition to x86 name.
X86UnsignedOrFPCondition(IfCondition cond)963 inline Condition X86UnsignedOrFPCondition(IfCondition cond) {
964 switch (cond) {
965 case kCondEQ: return kEqual;
966 case kCondNE: return kNotEqual;
967 // Signed to unsigned, and FP to x86 name.
968 case kCondLT: return kBelow;
969 case kCondLE: return kBelowEqual;
970 case kCondGT: return kAbove;
971 case kCondGE: return kAboveEqual;
972 // Unsigned remain unchanged.
973 case kCondB: return kBelow;
974 case kCondBE: return kBelowEqual;
975 case kCondA: return kAbove;
976 case kCondAE: return kAboveEqual;
977 }
978 LOG(FATAL) << "Unreachable";
979 UNREACHABLE();
980 }
981
DumpCoreRegister(std::ostream & stream,int reg) const982 void CodeGeneratorX86::DumpCoreRegister(std::ostream& stream, int reg) const {
983 stream << Register(reg);
984 }
985
DumpFloatingPointRegister(std::ostream & stream,int reg) const986 void CodeGeneratorX86::DumpFloatingPointRegister(std::ostream& stream, int reg) const {
987 stream << XmmRegister(reg);
988 }
989
GetInstructionSetFeatures() const990 const X86InstructionSetFeatures& CodeGeneratorX86::GetInstructionSetFeatures() const {
991 return *GetCompilerOptions().GetInstructionSetFeatures()->AsX86InstructionSetFeatures();
992 }
993
SaveCoreRegister(size_t stack_index,uint32_t reg_id)994 size_t CodeGeneratorX86::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
995 __ movl(Address(ESP, stack_index), static_cast<Register>(reg_id));
996 return kX86WordSize;
997 }
998
RestoreCoreRegister(size_t stack_index,uint32_t reg_id)999 size_t CodeGeneratorX86::RestoreCoreRegister(size_t stack_index, uint32_t reg_id) {
1000 __ movl(static_cast<Register>(reg_id), Address(ESP, stack_index));
1001 return kX86WordSize;
1002 }
1003
SaveFloatingPointRegister(size_t stack_index,uint32_t reg_id)1004 size_t CodeGeneratorX86::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
1005 if (GetGraph()->HasSIMD()) {
1006 __ movups(Address(ESP, stack_index), XmmRegister(reg_id));
1007 } else {
1008 __ movsd(Address(ESP, stack_index), XmmRegister(reg_id));
1009 }
1010 return GetSlowPathFPWidth();
1011 }
1012
RestoreFloatingPointRegister(size_t stack_index,uint32_t reg_id)1013 size_t CodeGeneratorX86::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
1014 if (GetGraph()->HasSIMD()) {
1015 __ movups(XmmRegister(reg_id), Address(ESP, stack_index));
1016 } else {
1017 __ movsd(XmmRegister(reg_id), Address(ESP, stack_index));
1018 }
1019 return GetSlowPathFPWidth();
1020 }
1021
InvokeRuntime(QuickEntrypointEnum entrypoint,HInstruction * instruction,uint32_t dex_pc,SlowPathCode * slow_path)1022 void CodeGeneratorX86::InvokeRuntime(QuickEntrypointEnum entrypoint,
1023 HInstruction* instruction,
1024 uint32_t dex_pc,
1025 SlowPathCode* slow_path) {
1026 ValidateInvokeRuntime(entrypoint, instruction, slow_path);
1027 GenerateInvokeRuntime(GetThreadOffset<kX86PointerSize>(entrypoint).Int32Value());
1028 if (EntrypointRequiresStackMap(entrypoint)) {
1029 RecordPcInfo(instruction, dex_pc, slow_path);
1030 }
1031 }
1032
InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,HInstruction * instruction,SlowPathCode * slow_path)1033 void CodeGeneratorX86::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
1034 HInstruction* instruction,
1035 SlowPathCode* slow_path) {
1036 ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path);
1037 GenerateInvokeRuntime(entry_point_offset);
1038 }
1039
GenerateInvokeRuntime(int32_t entry_point_offset)1040 void CodeGeneratorX86::GenerateInvokeRuntime(int32_t entry_point_offset) {
1041 __ fs()->call(Address::Absolute(entry_point_offset));
1042 }
1043
CodeGeneratorX86(HGraph * graph,const CompilerOptions & compiler_options,OptimizingCompilerStats * stats)1044 CodeGeneratorX86::CodeGeneratorX86(HGraph* graph,
1045 const CompilerOptions& compiler_options,
1046 OptimizingCompilerStats* stats)
1047 : CodeGenerator(graph,
1048 kNumberOfCpuRegisters,
1049 kNumberOfXmmRegisters,
1050 kNumberOfRegisterPairs,
1051 ComputeRegisterMask(reinterpret_cast<const int*>(kCoreCalleeSaves),
1052 arraysize(kCoreCalleeSaves))
1053 | (1 << kFakeReturnRegister),
1054 0,
1055 compiler_options,
1056 stats),
1057 block_labels_(nullptr),
1058 location_builder_(graph, this),
1059 instruction_visitor_(graph, this),
1060 move_resolver_(graph->GetAllocator(), this),
1061 assembler_(graph->GetAllocator()),
1062 boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1063 method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1064 boot_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1065 type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1066 public_type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1067 package_type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1068 boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1069 string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1070 boot_image_jni_entrypoint_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1071 boot_image_other_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1072 jit_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1073 jit_class_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1074 constant_area_start_(-1),
1075 fixups_to_jump_tables_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1076 method_address_offset_(std::less<uint32_t>(),
1077 graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) {
1078 // Use a fake return address register to mimic Quick.
1079 AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister));
1080 }
1081
SetupBlockedRegisters() const1082 void CodeGeneratorX86::SetupBlockedRegisters() const {
1083 // Stack register is always reserved.
1084 blocked_core_registers_[ESP] = true;
1085 }
1086
InstructionCodeGeneratorX86(HGraph * graph,CodeGeneratorX86 * codegen)1087 InstructionCodeGeneratorX86::InstructionCodeGeneratorX86(HGraph* graph, CodeGeneratorX86* codegen)
1088 : InstructionCodeGenerator(graph, codegen),
1089 assembler_(codegen->GetAssembler()),
1090 codegen_(codegen) {}
1091
DWARFReg(Register reg)1092 static dwarf::Reg DWARFReg(Register reg) {
1093 return dwarf::Reg::X86Core(static_cast<int>(reg));
1094 }
1095
MaybeIncrementHotness(bool is_frame_entry)1096 void CodeGeneratorX86::MaybeIncrementHotness(bool is_frame_entry) {
1097 if (GetCompilerOptions().CountHotnessInCompiledCode()) {
1098 Register reg = EAX;
1099 if (is_frame_entry) {
1100 reg = kMethodRegisterArgument;
1101 } else {
1102 __ pushl(EAX);
1103 __ cfi().AdjustCFAOffset(4);
1104 __ movl(EAX, Address(ESP, kX86WordSize));
1105 }
1106 NearLabel overflow;
1107 __ cmpw(Address(reg, ArtMethod::HotnessCountOffset().Int32Value()),
1108 Immediate(ArtMethod::MaxCounter()));
1109 __ j(kEqual, &overflow);
1110 __ addw(Address(reg, ArtMethod::HotnessCountOffset().Int32Value()),
1111 Immediate(1));
1112 __ Bind(&overflow);
1113 if (!is_frame_entry) {
1114 __ popl(EAX);
1115 __ cfi().AdjustCFAOffset(-4);
1116 }
1117 }
1118
1119 if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) {
1120 ScopedProfilingInfoUse spiu(
1121 Runtime::Current()->GetJit(), GetGraph()->GetArtMethod(), Thread::Current());
1122 ProfilingInfo* info = spiu.GetProfilingInfo();
1123 if (info != nullptr) {
1124 uint32_t address = reinterpret_cast32<uint32_t>(info);
1125 NearLabel done;
1126 if (HasEmptyFrame()) {
1127 CHECK(is_frame_entry);
1128 // Alignment
1129 IncreaseFrame(8);
1130 // We need a temporary. The stub also expects the method at bottom of stack.
1131 __ pushl(EAX);
1132 __ cfi().AdjustCFAOffset(4);
1133 __ movl(EAX, Immediate(address));
1134 __ addw(Address(EAX, ProfilingInfo::BaselineHotnessCountOffset().Int32Value()),
1135 Immediate(1));
1136 __ andw(Address(EAX, ProfilingInfo::BaselineHotnessCountOffset().Int32Value()),
1137 Immediate(interpreter::kTieredHotnessMask));
1138 __ j(kNotZero, &done);
1139 GenerateInvokeRuntime(
1140 GetThreadOffset<kX86PointerSize>(kQuickCompileOptimized).Int32Value());
1141 __ Bind(&done);
1142 // We don't strictly require to restore EAX, but this makes the generated
1143 // code easier to reason about.
1144 __ popl(EAX);
1145 __ cfi().AdjustCFAOffset(-4);
1146 DecreaseFrame(8);
1147 } else {
1148 if (!RequiresCurrentMethod()) {
1149 CHECK(is_frame_entry);
1150 __ movl(Address(ESP, kCurrentMethodStackOffset), kMethodRegisterArgument);
1151 }
1152 // We need a temporary.
1153 __ pushl(EAX);
1154 __ cfi().AdjustCFAOffset(4);
1155 __ movl(EAX, Immediate(address));
1156 __ addw(Address(EAX, ProfilingInfo::BaselineHotnessCountOffset().Int32Value()),
1157 Immediate(1));
1158 __ popl(EAX); // Put stack as expected before exiting or calling stub.
1159 __ cfi().AdjustCFAOffset(-4);
1160 __ j(kCarryClear, &done);
1161 GenerateInvokeRuntime(
1162 GetThreadOffset<kX86PointerSize>(kQuickCompileOptimized).Int32Value());
1163 __ Bind(&done);
1164 }
1165 }
1166 }
1167 }
1168
GenerateFrameEntry()1169 void CodeGeneratorX86::GenerateFrameEntry() {
1170 __ cfi().SetCurrentCFAOffset(kX86WordSize); // return address
1171 __ Bind(&frame_entry_label_);
1172 bool skip_overflow_check =
1173 IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86);
1174 DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
1175
1176 if (!skip_overflow_check) {
1177 size_t reserved_bytes = GetStackOverflowReservedBytes(InstructionSet::kX86);
1178 __ testl(EAX, Address(ESP, -static_cast<int32_t>(reserved_bytes)));
1179 RecordPcInfo(nullptr, 0);
1180 }
1181
1182 if (!HasEmptyFrame()) {
1183 for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) {
1184 Register reg = kCoreCalleeSaves[i];
1185 if (allocated_registers_.ContainsCoreRegister(reg)) {
1186 __ pushl(reg);
1187 __ cfi().AdjustCFAOffset(kX86WordSize);
1188 __ cfi().RelOffset(DWARFReg(reg), 0);
1189 }
1190 }
1191
1192 int adjust = GetFrameSize() - FrameEntrySpillSize();
1193 IncreaseFrame(adjust);
1194 // Save the current method if we need it. Note that we do not
1195 // do this in HCurrentMethod, as the instruction might have been removed
1196 // in the SSA graph.
1197 if (RequiresCurrentMethod()) {
1198 __ movl(Address(ESP, kCurrentMethodStackOffset), kMethodRegisterArgument);
1199 }
1200
1201 if (GetGraph()->HasShouldDeoptimizeFlag()) {
1202 // Initialize should_deoptimize flag to 0.
1203 __ movl(Address(ESP, GetStackOffsetOfShouldDeoptimizeFlag()), Immediate(0));
1204 }
1205 }
1206
1207 MaybeIncrementHotness(/* is_frame_entry= */ true);
1208 }
1209
GenerateFrameExit()1210 void CodeGeneratorX86::GenerateFrameExit() {
1211 __ cfi().RememberState();
1212 if (!HasEmptyFrame()) {
1213 int adjust = GetFrameSize() - FrameEntrySpillSize();
1214 DecreaseFrame(adjust);
1215
1216 for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
1217 Register reg = kCoreCalleeSaves[i];
1218 if (allocated_registers_.ContainsCoreRegister(reg)) {
1219 __ popl(reg);
1220 __ cfi().AdjustCFAOffset(-static_cast<int>(kX86WordSize));
1221 __ cfi().Restore(DWARFReg(reg));
1222 }
1223 }
1224 }
1225 __ ret();
1226 __ cfi().RestoreState();
1227 __ cfi().DefCFAOffset(GetFrameSize());
1228 }
1229
Bind(HBasicBlock * block)1230 void CodeGeneratorX86::Bind(HBasicBlock* block) {
1231 __ Bind(GetLabelOf(block));
1232 }
1233
GetReturnLocation(DataType::Type type) const1234 Location InvokeDexCallingConventionVisitorX86::GetReturnLocation(DataType::Type type) const {
1235 switch (type) {
1236 case DataType::Type::kReference:
1237 case DataType::Type::kBool:
1238 case DataType::Type::kUint8:
1239 case DataType::Type::kInt8:
1240 case DataType::Type::kUint16:
1241 case DataType::Type::kInt16:
1242 case DataType::Type::kUint32:
1243 case DataType::Type::kInt32:
1244 return Location::RegisterLocation(EAX);
1245
1246 case DataType::Type::kUint64:
1247 case DataType::Type::kInt64:
1248 return Location::RegisterPairLocation(EAX, EDX);
1249
1250 case DataType::Type::kVoid:
1251 return Location::NoLocation();
1252
1253 case DataType::Type::kFloat64:
1254 case DataType::Type::kFloat32:
1255 return Location::FpuRegisterLocation(XMM0);
1256 }
1257
1258 UNREACHABLE();
1259 }
1260
GetMethodLocation() const1261 Location InvokeDexCallingConventionVisitorX86::GetMethodLocation() const {
1262 return Location::RegisterLocation(kMethodRegisterArgument);
1263 }
1264
GetNextLocation(DataType::Type type)1265 Location InvokeDexCallingConventionVisitorX86::GetNextLocation(DataType::Type type) {
1266 switch (type) {
1267 case DataType::Type::kReference:
1268 case DataType::Type::kBool:
1269 case DataType::Type::kUint8:
1270 case DataType::Type::kInt8:
1271 case DataType::Type::kUint16:
1272 case DataType::Type::kInt16:
1273 case DataType::Type::kInt32: {
1274 uint32_t index = gp_index_++;
1275 stack_index_++;
1276 if (index < calling_convention.GetNumberOfRegisters()) {
1277 return Location::RegisterLocation(calling_convention.GetRegisterAt(index));
1278 } else {
1279 return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1));
1280 }
1281 }
1282
1283 case DataType::Type::kInt64: {
1284 uint32_t index = gp_index_;
1285 gp_index_ += 2;
1286 stack_index_ += 2;
1287 if (index + 1 < calling_convention.GetNumberOfRegisters()) {
1288 X86ManagedRegister pair = X86ManagedRegister::FromRegisterPair(
1289 calling_convention.GetRegisterPairAt(index));
1290 return Location::RegisterPairLocation(pair.AsRegisterPairLow(), pair.AsRegisterPairHigh());
1291 } else {
1292 return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2));
1293 }
1294 }
1295
1296 case DataType::Type::kFloat32: {
1297 uint32_t index = float_index_++;
1298 stack_index_++;
1299 if (index < calling_convention.GetNumberOfFpuRegisters()) {
1300 return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
1301 } else {
1302 return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1));
1303 }
1304 }
1305
1306 case DataType::Type::kFloat64: {
1307 uint32_t index = float_index_++;
1308 stack_index_ += 2;
1309 if (index < calling_convention.GetNumberOfFpuRegisters()) {
1310 return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
1311 } else {
1312 return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2));
1313 }
1314 }
1315
1316 case DataType::Type::kUint32:
1317 case DataType::Type::kUint64:
1318 case DataType::Type::kVoid:
1319 LOG(FATAL) << "Unexpected parameter type " << type;
1320 UNREACHABLE();
1321 }
1322 return Location::NoLocation();
1323 }
1324
GetNextLocation(DataType::Type type)1325 Location CriticalNativeCallingConventionVisitorX86::GetNextLocation(DataType::Type type) {
1326 DCHECK_NE(type, DataType::Type::kReference);
1327
1328 Location location;
1329 if (DataType::Is64BitType(type)) {
1330 location = Location::DoubleStackSlot(stack_offset_);
1331 stack_offset_ += 2 * kFramePointerSize;
1332 } else {
1333 location = Location::StackSlot(stack_offset_);
1334 stack_offset_ += kFramePointerSize;
1335 }
1336 if (for_register_allocation_) {
1337 location = Location::Any();
1338 }
1339 return location;
1340 }
1341
GetReturnLocation(DataType::Type type) const1342 Location CriticalNativeCallingConventionVisitorX86::GetReturnLocation(DataType::Type type) const {
1343 // We perform conversion to the managed ABI return register after the call if needed.
1344 InvokeDexCallingConventionVisitorX86 dex_calling_convention;
1345 return dex_calling_convention.GetReturnLocation(type);
1346 }
1347
GetMethodLocation() const1348 Location CriticalNativeCallingConventionVisitorX86::GetMethodLocation() const {
1349 // Pass the method in the hidden argument EAX.
1350 return Location::RegisterLocation(EAX);
1351 }
1352
Move32(Location destination,Location source)1353 void CodeGeneratorX86::Move32(Location destination, Location source) {
1354 if (source.Equals(destination)) {
1355 return;
1356 }
1357 if (destination.IsRegister()) {
1358 if (source.IsRegister()) {
1359 __ movl(destination.AsRegister<Register>(), source.AsRegister<Register>());
1360 } else if (source.IsFpuRegister()) {
1361 __ movd(destination.AsRegister<Register>(), source.AsFpuRegister<XmmRegister>());
1362 } else if (source.IsConstant()) {
1363 int32_t value = GetInt32ValueOf(source.GetConstant());
1364 __ movl(destination.AsRegister<Register>(), Immediate(value));
1365 } else {
1366 DCHECK(source.IsStackSlot());
1367 __ movl(destination.AsRegister<Register>(), Address(ESP, source.GetStackIndex()));
1368 }
1369 } else if (destination.IsFpuRegister()) {
1370 if (source.IsRegister()) {
1371 __ movd(destination.AsFpuRegister<XmmRegister>(), source.AsRegister<Register>());
1372 } else if (source.IsFpuRegister()) {
1373 __ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
1374 } else {
1375 DCHECK(source.IsStackSlot());
1376 __ movss(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex()));
1377 }
1378 } else {
1379 DCHECK(destination.IsStackSlot()) << destination;
1380 if (source.IsRegister()) {
1381 __ movl(Address(ESP, destination.GetStackIndex()), source.AsRegister<Register>());
1382 } else if (source.IsFpuRegister()) {
1383 __ movss(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
1384 } else if (source.IsConstant()) {
1385 HConstant* constant = source.GetConstant();
1386 int32_t value = GetInt32ValueOf(constant);
1387 __ movl(Address(ESP, destination.GetStackIndex()), Immediate(value));
1388 } else {
1389 DCHECK(source.IsStackSlot());
1390 __ pushl(Address(ESP, source.GetStackIndex()));
1391 __ popl(Address(ESP, destination.GetStackIndex()));
1392 }
1393 }
1394 }
1395
Move64(Location destination,Location source)1396 void CodeGeneratorX86::Move64(Location destination, Location source) {
1397 if (source.Equals(destination)) {
1398 return;
1399 }
1400 if (destination.IsRegisterPair()) {
1401 if (source.IsRegisterPair()) {
1402 EmitParallelMoves(
1403 Location::RegisterLocation(source.AsRegisterPairHigh<Register>()),
1404 Location::RegisterLocation(destination.AsRegisterPairHigh<Register>()),
1405 DataType::Type::kInt32,
1406 Location::RegisterLocation(source.AsRegisterPairLow<Register>()),
1407 Location::RegisterLocation(destination.AsRegisterPairLow<Register>()),
1408 DataType::Type::kInt32);
1409 } else if (source.IsFpuRegister()) {
1410 XmmRegister src_reg = source.AsFpuRegister<XmmRegister>();
1411 __ movd(destination.AsRegisterPairLow<Register>(), src_reg);
1412 __ psrlq(src_reg, Immediate(32));
1413 __ movd(destination.AsRegisterPairHigh<Register>(), src_reg);
1414 } else {
1415 // No conflict possible, so just do the moves.
1416 DCHECK(source.IsDoubleStackSlot());
1417 __ movl(destination.AsRegisterPairLow<Register>(), Address(ESP, source.GetStackIndex()));
1418 __ movl(destination.AsRegisterPairHigh<Register>(),
1419 Address(ESP, source.GetHighStackIndex(kX86WordSize)));
1420 }
1421 } else if (destination.IsFpuRegister()) {
1422 if (source.IsFpuRegister()) {
1423 __ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
1424 } else if (source.IsDoubleStackSlot()) {
1425 __ movsd(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex()));
1426 } else if (source.IsRegisterPair()) {
1427 size_t elem_size = DataType::Size(DataType::Type::kInt32);
1428 // Push the 2 source registers to the stack.
1429 __ pushl(source.AsRegisterPairHigh<Register>());
1430 __ cfi().AdjustCFAOffset(elem_size);
1431 __ pushl(source.AsRegisterPairLow<Register>());
1432 __ cfi().AdjustCFAOffset(elem_size);
1433 __ movsd(destination.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
1434 // And remove the temporary stack space we allocated.
1435 DecreaseFrame(2 * elem_size);
1436 } else {
1437 LOG(FATAL) << "Unimplemented";
1438 }
1439 } else {
1440 DCHECK(destination.IsDoubleStackSlot()) << destination;
1441 if (source.IsRegisterPair()) {
1442 // No conflict possible, so just do the moves.
1443 __ movl(Address(ESP, destination.GetStackIndex()), source.AsRegisterPairLow<Register>());
1444 __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)),
1445 source.AsRegisterPairHigh<Register>());
1446 } else if (source.IsFpuRegister()) {
1447 __ movsd(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
1448 } else if (source.IsConstant()) {
1449 HConstant* constant = source.GetConstant();
1450 DCHECK(constant->IsLongConstant() || constant->IsDoubleConstant());
1451 int64_t value = GetInt64ValueOf(constant);
1452 __ movl(Address(ESP, destination.GetStackIndex()), Immediate(Low32Bits(value)));
1453 __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)),
1454 Immediate(High32Bits(value)));
1455 } else {
1456 DCHECK(source.IsDoubleStackSlot()) << source;
1457 EmitParallelMoves(
1458 Location::StackSlot(source.GetStackIndex()),
1459 Location::StackSlot(destination.GetStackIndex()),
1460 DataType::Type::kInt32,
1461 Location::StackSlot(source.GetHighStackIndex(kX86WordSize)),
1462 Location::StackSlot(destination.GetHighStackIndex(kX86WordSize)),
1463 DataType::Type::kInt32);
1464 }
1465 }
1466 }
1467
CreateAddress(Register base,Register index=Register::kNoRegister,ScaleFactor scale=TIMES_1,int32_t disp=0)1468 static Address CreateAddress(Register base,
1469 Register index = Register::kNoRegister,
1470 ScaleFactor scale = TIMES_1,
1471 int32_t disp = 0) {
1472 if (index == Register::kNoRegister) {
1473 return Address(base, disp);
1474 }
1475
1476 return Address(base, index, scale, disp);
1477 }
1478
LoadFromMemoryNoBarrier(DataType::Type dst_type,Location dst,Address src,XmmRegister temp,bool is_atomic_load)1479 void CodeGeneratorX86::LoadFromMemoryNoBarrier(DataType::Type dst_type,
1480 Location dst,
1481 Address src,
1482 XmmRegister temp,
1483 bool is_atomic_load) {
1484 switch (dst_type) {
1485 case DataType::Type::kBool:
1486 case DataType::Type::kUint8:
1487 __ movzxb(dst.AsRegister<Register>(), src);
1488 break;
1489 case DataType::Type::kInt8:
1490 __ movsxb(dst.AsRegister<Register>(), src);
1491 break;
1492 case DataType::Type::kInt16:
1493 __ movsxw(dst.AsRegister<Register>(), src);
1494 break;
1495 case DataType::Type::kUint16:
1496 __ movzxw(dst.AsRegister<Register>(), src);
1497 break;
1498 case DataType::Type::kInt32:
1499 __ movl(dst.AsRegister<Register>(), src);
1500 break;
1501 case DataType::Type::kInt64: {
1502 if (is_atomic_load) {
1503 __ movsd(temp, src);
1504 __ movd(dst.AsRegisterPairLow<Register>(), temp);
1505 __ psrlq(temp, Immediate(32));
1506 __ movd(dst.AsRegisterPairHigh<Register>(), temp);
1507 } else {
1508 DCHECK_NE(src.GetBaseRegister(), dst.AsRegisterPairLow<Register>());
1509 Address src_high = src.displaceBy(kX86WordSize);
1510 __ movl(dst.AsRegisterPairLow<Register>(), src);
1511 __ movl(dst.AsRegisterPairHigh<Register>(), src_high);
1512 }
1513 break;
1514 }
1515 case DataType::Type::kFloat32:
1516 __ movss(dst.AsFpuRegister<XmmRegister>(), src);
1517 break;
1518 case DataType::Type::kFloat64:
1519 __ movsd(dst.AsFpuRegister<XmmRegister>(), src);
1520 break;
1521 case DataType::Type::kReference:
1522 __ movl(dst.AsRegister<Register>(), src);
1523 __ MaybeUnpoisonHeapReference(dst.AsRegister<Register>());
1524 break;
1525 default:
1526 LOG(FATAL) << "Unreachable type " << dst_type;
1527 }
1528 }
1529
MoveToMemory(DataType::Type src_type,Location src,Register dst_base,Register dst_index,ScaleFactor dst_scale,int32_t dst_disp)1530 void CodeGeneratorX86::MoveToMemory(DataType::Type src_type,
1531 Location src,
1532 Register dst_base,
1533 Register dst_index,
1534 ScaleFactor dst_scale,
1535 int32_t dst_disp) {
1536 DCHECK(dst_base != Register::kNoRegister);
1537 Address dst = CreateAddress(dst_base, dst_index, dst_scale, dst_disp);
1538
1539 switch (src_type) {
1540 case DataType::Type::kBool:
1541 case DataType::Type::kUint8:
1542 case DataType::Type::kInt8: {
1543 if (src.IsConstant()) {
1544 __ movb(dst, Immediate(CodeGenerator::GetInt8ValueOf(src.GetConstant())));
1545 } else {
1546 __ movb(dst, src.AsRegister<ByteRegister>());
1547 }
1548 break;
1549 }
1550 case DataType::Type::kUint16:
1551 case DataType::Type::kInt16: {
1552 if (src.IsConstant()) {
1553 __ movw(dst, Immediate(CodeGenerator::GetInt16ValueOf(src.GetConstant())));
1554 } else {
1555 __ movw(dst, src.AsRegister<Register>());
1556 }
1557 break;
1558 }
1559 case DataType::Type::kUint32:
1560 case DataType::Type::kInt32: {
1561 if (src.IsConstant()) {
1562 int32_t v = CodeGenerator::GetInt32ValueOf(src.GetConstant());
1563 __ movl(dst, Immediate(v));
1564 } else {
1565 __ movl(dst, src.AsRegister<Register>());
1566 }
1567 break;
1568 }
1569 case DataType::Type::kUint64:
1570 case DataType::Type::kInt64: {
1571 Address dst_next_4_bytes = CreateAddress(dst_base, dst_index, dst_scale, dst_disp + 4);
1572 if (src.IsConstant()) {
1573 int64_t v = CodeGenerator::GetInt64ValueOf(src.GetConstant());
1574 __ movl(dst, Immediate(Low32Bits(v)));
1575 __ movl(dst_next_4_bytes, Immediate(High32Bits(v)));
1576 } else {
1577 __ movl(dst, src.AsRegisterPairLow<Register>());
1578 __ movl(dst_next_4_bytes, src.AsRegisterPairHigh<Register>());
1579 }
1580 break;
1581 }
1582 case DataType::Type::kFloat32: {
1583 if (src.IsConstant()) {
1584 int32_t v = CodeGenerator::GetInt32ValueOf(src.GetConstant());
1585 __ movl(dst, Immediate(v));
1586 } else {
1587 __ movss(dst, src.AsFpuRegister<XmmRegister>());
1588 }
1589 break;
1590 }
1591 case DataType::Type::kFloat64: {
1592 Address dst_next_4_bytes = CreateAddress(dst_base, dst_index, dst_scale, dst_disp + 4);
1593 if (src.IsConstant()) {
1594 int64_t v = CodeGenerator::GetInt64ValueOf(src.GetConstant());
1595 __ movl(dst, Immediate(Low32Bits(v)));
1596 __ movl(dst_next_4_bytes, Immediate(High32Bits(v)));
1597 } else {
1598 __ movsd(dst, src.AsFpuRegister<XmmRegister>());
1599 }
1600 break;
1601 }
1602 case DataType::Type::kVoid:
1603 case DataType::Type::kReference:
1604 LOG(FATAL) << "Unreachable type " << src_type;
1605 }
1606 }
1607
MoveConstant(Location location,int32_t value)1608 void CodeGeneratorX86::MoveConstant(Location location, int32_t value) {
1609 DCHECK(location.IsRegister());
1610 __ movl(location.AsRegister<Register>(), Immediate(value));
1611 }
1612
MoveLocation(Location dst,Location src,DataType::Type dst_type)1613 void CodeGeneratorX86::MoveLocation(Location dst, Location src, DataType::Type dst_type) {
1614 HParallelMove move(GetGraph()->GetAllocator());
1615 if (dst_type == DataType::Type::kInt64 && !src.IsConstant() && !src.IsFpuRegister()) {
1616 move.AddMove(src.ToLow(), dst.ToLow(), DataType::Type::kInt32, nullptr);
1617 move.AddMove(src.ToHigh(), dst.ToHigh(), DataType::Type::kInt32, nullptr);
1618 } else {
1619 move.AddMove(src, dst, dst_type, nullptr);
1620 }
1621 GetMoveResolver()->EmitNativeCode(&move);
1622 }
1623
AddLocationAsTemp(Location location,LocationSummary * locations)1624 void CodeGeneratorX86::AddLocationAsTemp(Location location, LocationSummary* locations) {
1625 if (location.IsRegister()) {
1626 locations->AddTemp(location);
1627 } else if (location.IsRegisterPair()) {
1628 locations->AddTemp(Location::RegisterLocation(location.AsRegisterPairLow<Register>()));
1629 locations->AddTemp(Location::RegisterLocation(location.AsRegisterPairHigh<Register>()));
1630 } else {
1631 UNIMPLEMENTED(FATAL) << "AddLocationAsTemp not implemented for location " << location;
1632 }
1633 }
1634
HandleGoto(HInstruction * got,HBasicBlock * successor)1635 void InstructionCodeGeneratorX86::HandleGoto(HInstruction* got, HBasicBlock* successor) {
1636 if (successor->IsExitBlock()) {
1637 DCHECK(got->GetPrevious()->AlwaysThrows());
1638 return; // no code needed
1639 }
1640
1641 HBasicBlock* block = got->GetBlock();
1642 HInstruction* previous = got->GetPrevious();
1643
1644 HLoopInformation* info = block->GetLoopInformation();
1645 if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
1646 codegen_->MaybeIncrementHotness(/* is_frame_entry= */ false);
1647 GenerateSuspendCheck(info->GetSuspendCheck(), successor);
1648 return;
1649 }
1650
1651 if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) {
1652 GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr);
1653 }
1654 if (!codegen_->GoesToNextBlock(got->GetBlock(), successor)) {
1655 __ jmp(codegen_->GetLabelOf(successor));
1656 }
1657 }
1658
VisitGoto(HGoto * got)1659 void LocationsBuilderX86::VisitGoto(HGoto* got) {
1660 got->SetLocations(nullptr);
1661 }
1662
VisitGoto(HGoto * got)1663 void InstructionCodeGeneratorX86::VisitGoto(HGoto* got) {
1664 HandleGoto(got, got->GetSuccessor());
1665 }
1666
VisitTryBoundary(HTryBoundary * try_boundary)1667 void LocationsBuilderX86::VisitTryBoundary(HTryBoundary* try_boundary) {
1668 try_boundary->SetLocations(nullptr);
1669 }
1670
VisitTryBoundary(HTryBoundary * try_boundary)1671 void InstructionCodeGeneratorX86::VisitTryBoundary(HTryBoundary* try_boundary) {
1672 HBasicBlock* successor = try_boundary->GetNormalFlowSuccessor();
1673 if (!successor->IsExitBlock()) {
1674 HandleGoto(try_boundary, successor);
1675 }
1676 }
1677
VisitExit(HExit * exit)1678 void LocationsBuilderX86::VisitExit(HExit* exit) {
1679 exit->SetLocations(nullptr);
1680 }
1681
VisitExit(HExit * exit ATTRIBUTE_UNUSED)1682 void InstructionCodeGeneratorX86::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
1683 }
1684
1685 template<class LabelType>
GenerateFPJumps(HCondition * cond,LabelType * true_label,LabelType * false_label)1686 void InstructionCodeGeneratorX86::GenerateFPJumps(HCondition* cond,
1687 LabelType* true_label,
1688 LabelType* false_label) {
1689 if (cond->IsFPConditionTrueIfNaN()) {
1690 __ j(kUnordered, true_label);
1691 } else if (cond->IsFPConditionFalseIfNaN()) {
1692 __ j(kUnordered, false_label);
1693 }
1694 __ j(X86UnsignedOrFPCondition(cond->GetCondition()), true_label);
1695 }
1696
1697 template<class LabelType>
GenerateLongComparesAndJumps(HCondition * cond,LabelType * true_label,LabelType * false_label)1698 void InstructionCodeGeneratorX86::GenerateLongComparesAndJumps(HCondition* cond,
1699 LabelType* true_label,
1700 LabelType* false_label) {
1701 LocationSummary* locations = cond->GetLocations();
1702 Location left = locations->InAt(0);
1703 Location right = locations->InAt(1);
1704 IfCondition if_cond = cond->GetCondition();
1705
1706 Register left_high = left.AsRegisterPairHigh<Register>();
1707 Register left_low = left.AsRegisterPairLow<Register>();
1708 IfCondition true_high_cond = if_cond;
1709 IfCondition false_high_cond = cond->GetOppositeCondition();
1710 Condition final_condition = X86UnsignedOrFPCondition(if_cond); // unsigned on lower part
1711
1712 // Set the conditions for the test, remembering that == needs to be
1713 // decided using the low words.
1714 switch (if_cond) {
1715 case kCondEQ:
1716 case kCondNE:
1717 // Nothing to do.
1718 break;
1719 case kCondLT:
1720 false_high_cond = kCondGT;
1721 break;
1722 case kCondLE:
1723 true_high_cond = kCondLT;
1724 break;
1725 case kCondGT:
1726 false_high_cond = kCondLT;
1727 break;
1728 case kCondGE:
1729 true_high_cond = kCondGT;
1730 break;
1731 case kCondB:
1732 false_high_cond = kCondA;
1733 break;
1734 case kCondBE:
1735 true_high_cond = kCondB;
1736 break;
1737 case kCondA:
1738 false_high_cond = kCondB;
1739 break;
1740 case kCondAE:
1741 true_high_cond = kCondA;
1742 break;
1743 }
1744
1745 if (right.IsConstant()) {
1746 int64_t value = right.GetConstant()->AsLongConstant()->GetValue();
1747 int32_t val_high = High32Bits(value);
1748 int32_t val_low = Low32Bits(value);
1749
1750 codegen_->Compare32BitValue(left_high, val_high);
1751 if (if_cond == kCondNE) {
1752 __ j(X86Condition(true_high_cond), true_label);
1753 } else if (if_cond == kCondEQ) {
1754 __ j(X86Condition(false_high_cond), false_label);
1755 } else {
1756 __ j(X86Condition(true_high_cond), true_label);
1757 __ j(X86Condition(false_high_cond), false_label);
1758 }
1759 // Must be equal high, so compare the lows.
1760 codegen_->Compare32BitValue(left_low, val_low);
1761 } else if (right.IsRegisterPair()) {
1762 Register right_high = right.AsRegisterPairHigh<Register>();
1763 Register right_low = right.AsRegisterPairLow<Register>();
1764
1765 __ cmpl(left_high, right_high);
1766 if (if_cond == kCondNE) {
1767 __ j(X86Condition(true_high_cond), true_label);
1768 } else if (if_cond == kCondEQ) {
1769 __ j(X86Condition(false_high_cond), false_label);
1770 } else {
1771 __ j(X86Condition(true_high_cond), true_label);
1772 __ j(X86Condition(false_high_cond), false_label);
1773 }
1774 // Must be equal high, so compare the lows.
1775 __ cmpl(left_low, right_low);
1776 } else {
1777 DCHECK(right.IsDoubleStackSlot());
1778 __ cmpl(left_high, Address(ESP, right.GetHighStackIndex(kX86WordSize)));
1779 if (if_cond == kCondNE) {
1780 __ j(X86Condition(true_high_cond), true_label);
1781 } else if (if_cond == kCondEQ) {
1782 __ j(X86Condition(false_high_cond), false_label);
1783 } else {
1784 __ j(X86Condition(true_high_cond), true_label);
1785 __ j(X86Condition(false_high_cond), false_label);
1786 }
1787 // Must be equal high, so compare the lows.
1788 __ cmpl(left_low, Address(ESP, right.GetStackIndex()));
1789 }
1790 // The last comparison might be unsigned.
1791 __ j(final_condition, true_label);
1792 }
1793
GenerateFPCompare(Location lhs,Location rhs,HInstruction * insn,bool is_double)1794 void InstructionCodeGeneratorX86::GenerateFPCompare(Location lhs,
1795 Location rhs,
1796 HInstruction* insn,
1797 bool is_double) {
1798 HX86LoadFromConstantTable* const_area = insn->InputAt(1)->AsX86LoadFromConstantTable();
1799 if (is_double) {
1800 if (rhs.IsFpuRegister()) {
1801 __ ucomisd(lhs.AsFpuRegister<XmmRegister>(), rhs.AsFpuRegister<XmmRegister>());
1802 } else if (const_area != nullptr) {
1803 DCHECK(const_area->IsEmittedAtUseSite());
1804 __ ucomisd(lhs.AsFpuRegister<XmmRegister>(),
1805 codegen_->LiteralDoubleAddress(
1806 const_area->GetConstant()->AsDoubleConstant()->GetValue(),
1807 const_area->GetBaseMethodAddress(),
1808 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
1809 } else {
1810 DCHECK(rhs.IsDoubleStackSlot());
1811 __ ucomisd(lhs.AsFpuRegister<XmmRegister>(), Address(ESP, rhs.GetStackIndex()));
1812 }
1813 } else {
1814 if (rhs.IsFpuRegister()) {
1815 __ ucomiss(lhs.AsFpuRegister<XmmRegister>(), rhs.AsFpuRegister<XmmRegister>());
1816 } else if (const_area != nullptr) {
1817 DCHECK(const_area->IsEmittedAtUseSite());
1818 __ ucomiss(lhs.AsFpuRegister<XmmRegister>(),
1819 codegen_->LiteralFloatAddress(
1820 const_area->GetConstant()->AsFloatConstant()->GetValue(),
1821 const_area->GetBaseMethodAddress(),
1822 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
1823 } else {
1824 DCHECK(rhs.IsStackSlot());
1825 __ ucomiss(lhs.AsFpuRegister<XmmRegister>(), Address(ESP, rhs.GetStackIndex()));
1826 }
1827 }
1828 }
1829
1830 template<class LabelType>
GenerateCompareTestAndBranch(HCondition * condition,LabelType * true_target_in,LabelType * false_target_in)1831 void InstructionCodeGeneratorX86::GenerateCompareTestAndBranch(HCondition* condition,
1832 LabelType* true_target_in,
1833 LabelType* false_target_in) {
1834 // Generated branching requires both targets to be explicit. If either of the
1835 // targets is nullptr (fallthrough) use and bind `fallthrough_target` instead.
1836 LabelType fallthrough_target;
1837 LabelType* true_target = true_target_in == nullptr ? &fallthrough_target : true_target_in;
1838 LabelType* false_target = false_target_in == nullptr ? &fallthrough_target : false_target_in;
1839
1840 LocationSummary* locations = condition->GetLocations();
1841 Location left = locations->InAt(0);
1842 Location right = locations->InAt(1);
1843
1844 DataType::Type type = condition->InputAt(0)->GetType();
1845 switch (type) {
1846 case DataType::Type::kInt64:
1847 GenerateLongComparesAndJumps(condition, true_target, false_target);
1848 break;
1849 case DataType::Type::kFloat32:
1850 GenerateFPCompare(left, right, condition, false);
1851 GenerateFPJumps(condition, true_target, false_target);
1852 break;
1853 case DataType::Type::kFloat64:
1854 GenerateFPCompare(left, right, condition, true);
1855 GenerateFPJumps(condition, true_target, false_target);
1856 break;
1857 default:
1858 LOG(FATAL) << "Unexpected compare type " << type;
1859 }
1860
1861 if (false_target != &fallthrough_target) {
1862 __ jmp(false_target);
1863 }
1864
1865 if (fallthrough_target.IsLinked()) {
1866 __ Bind(&fallthrough_target);
1867 }
1868 }
1869
AreEflagsSetFrom(HInstruction * cond,HInstruction * branch)1870 static bool AreEflagsSetFrom(HInstruction* cond, HInstruction* branch) {
1871 // Moves may affect the eflags register (move zero uses xorl), so the EFLAGS
1872 // are set only strictly before `branch`. We can't use the eflags on long/FP
1873 // conditions if they are materialized due to the complex branching.
1874 return cond->IsCondition() &&
1875 cond->GetNext() == branch &&
1876 cond->InputAt(0)->GetType() != DataType::Type::kInt64 &&
1877 !DataType::IsFloatingPointType(cond->InputAt(0)->GetType());
1878 }
1879
1880 template<class LabelType>
GenerateTestAndBranch(HInstruction * instruction,size_t condition_input_index,LabelType * true_target,LabelType * false_target)1881 void InstructionCodeGeneratorX86::GenerateTestAndBranch(HInstruction* instruction,
1882 size_t condition_input_index,
1883 LabelType* true_target,
1884 LabelType* false_target) {
1885 HInstruction* cond = instruction->InputAt(condition_input_index);
1886
1887 if (true_target == nullptr && false_target == nullptr) {
1888 // Nothing to do. The code always falls through.
1889 return;
1890 } else if (cond->IsIntConstant()) {
1891 // Constant condition, statically compared against "true" (integer value 1).
1892 if (cond->AsIntConstant()->IsTrue()) {
1893 if (true_target != nullptr) {
1894 __ jmp(true_target);
1895 }
1896 } else {
1897 DCHECK(cond->AsIntConstant()->IsFalse()) << cond->AsIntConstant()->GetValue();
1898 if (false_target != nullptr) {
1899 __ jmp(false_target);
1900 }
1901 }
1902 return;
1903 }
1904
1905 // The following code generates these patterns:
1906 // (1) true_target == nullptr && false_target != nullptr
1907 // - opposite condition true => branch to false_target
1908 // (2) true_target != nullptr && false_target == nullptr
1909 // - condition true => branch to true_target
1910 // (3) true_target != nullptr && false_target != nullptr
1911 // - condition true => branch to true_target
1912 // - branch to false_target
1913 if (IsBooleanValueOrMaterializedCondition(cond)) {
1914 if (AreEflagsSetFrom(cond, instruction)) {
1915 if (true_target == nullptr) {
1916 __ j(X86Condition(cond->AsCondition()->GetOppositeCondition()), false_target);
1917 } else {
1918 __ j(X86Condition(cond->AsCondition()->GetCondition()), true_target);
1919 }
1920 } else {
1921 // Materialized condition, compare against 0.
1922 Location lhs = instruction->GetLocations()->InAt(condition_input_index);
1923 if (lhs.IsRegister()) {
1924 __ testl(lhs.AsRegister<Register>(), lhs.AsRegister<Register>());
1925 } else {
1926 __ cmpl(Address(ESP, lhs.GetStackIndex()), Immediate(0));
1927 }
1928 if (true_target == nullptr) {
1929 __ j(kEqual, false_target);
1930 } else {
1931 __ j(kNotEqual, true_target);
1932 }
1933 }
1934 } else {
1935 // Condition has not been materialized, use its inputs as the comparison and
1936 // its condition as the branch condition.
1937 HCondition* condition = cond->AsCondition();
1938
1939 // If this is a long or FP comparison that has been folded into
1940 // the HCondition, generate the comparison directly.
1941 DataType::Type type = condition->InputAt(0)->GetType();
1942 if (type == DataType::Type::kInt64 || DataType::IsFloatingPointType(type)) {
1943 GenerateCompareTestAndBranch(condition, true_target, false_target);
1944 return;
1945 }
1946
1947 Location lhs = condition->GetLocations()->InAt(0);
1948 Location rhs = condition->GetLocations()->InAt(1);
1949 // LHS is guaranteed to be in a register (see LocationsBuilderX86::HandleCondition).
1950 codegen_->GenerateIntCompare(lhs, rhs);
1951 if (true_target == nullptr) {
1952 __ j(X86Condition(condition->GetOppositeCondition()), false_target);
1953 } else {
1954 __ j(X86Condition(condition->GetCondition()), true_target);
1955 }
1956 }
1957
1958 // If neither branch falls through (case 3), the conditional branch to `true_target`
1959 // was already emitted (case 2) and we need to emit a jump to `false_target`.
1960 if (true_target != nullptr && false_target != nullptr) {
1961 __ jmp(false_target);
1962 }
1963 }
1964
VisitIf(HIf * if_instr)1965 void LocationsBuilderX86::VisitIf(HIf* if_instr) {
1966 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(if_instr);
1967 if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
1968 locations->SetInAt(0, Location::Any());
1969 }
1970 }
1971
VisitIf(HIf * if_instr)1972 void InstructionCodeGeneratorX86::VisitIf(HIf* if_instr) {
1973 HBasicBlock* true_successor = if_instr->IfTrueSuccessor();
1974 HBasicBlock* false_successor = if_instr->IfFalseSuccessor();
1975 Label* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ?
1976 nullptr : codegen_->GetLabelOf(true_successor);
1977 Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ?
1978 nullptr : codegen_->GetLabelOf(false_successor);
1979 GenerateTestAndBranch(if_instr, /* condition_input_index= */ 0, true_target, false_target);
1980 }
1981
VisitDeoptimize(HDeoptimize * deoptimize)1982 void LocationsBuilderX86::VisitDeoptimize(HDeoptimize* deoptimize) {
1983 LocationSummary* locations = new (GetGraph()->GetAllocator())
1984 LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
1985 InvokeRuntimeCallingConvention calling_convention;
1986 RegisterSet caller_saves = RegisterSet::Empty();
1987 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1988 locations->SetCustomSlowPathCallerSaves(caller_saves);
1989 if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
1990 locations->SetInAt(0, Location::Any());
1991 }
1992 }
1993
VisitDeoptimize(HDeoptimize * deoptimize)1994 void InstructionCodeGeneratorX86::VisitDeoptimize(HDeoptimize* deoptimize) {
1995 SlowPathCode* slow_path = deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathX86>(deoptimize);
1996 GenerateTestAndBranch<Label>(deoptimize,
1997 /* condition_input_index= */ 0,
1998 slow_path->GetEntryLabel(),
1999 /* false_target= */ nullptr);
2000 }
2001
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)2002 void LocationsBuilderX86::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
2003 LocationSummary* locations = new (GetGraph()->GetAllocator())
2004 LocationSummary(flag, LocationSummary::kNoCall);
2005 locations->SetOut(Location::RequiresRegister());
2006 }
2007
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)2008 void InstructionCodeGeneratorX86::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
2009 __ movl(flag->GetLocations()->Out().AsRegister<Register>(),
2010 Address(ESP, codegen_->GetStackOffsetOfShouldDeoptimizeFlag()));
2011 }
2012
SelectCanUseCMOV(HSelect * select)2013 static bool SelectCanUseCMOV(HSelect* select) {
2014 // There are no conditional move instructions for XMMs.
2015 if (DataType::IsFloatingPointType(select->GetType())) {
2016 return false;
2017 }
2018
2019 // A FP condition doesn't generate the single CC that we need.
2020 // In 32 bit mode, a long condition doesn't generate a single CC either.
2021 HInstruction* condition = select->GetCondition();
2022 if (condition->IsCondition()) {
2023 DataType::Type compare_type = condition->InputAt(0)->GetType();
2024 if (compare_type == DataType::Type::kInt64 ||
2025 DataType::IsFloatingPointType(compare_type)) {
2026 return false;
2027 }
2028 }
2029
2030 // We can generate a CMOV for this Select.
2031 return true;
2032 }
2033
VisitSelect(HSelect * select)2034 void LocationsBuilderX86::VisitSelect(HSelect* select) {
2035 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(select);
2036 if (DataType::IsFloatingPointType(select->GetType())) {
2037 locations->SetInAt(0, Location::RequiresFpuRegister());
2038 locations->SetInAt(1, Location::Any());
2039 } else {
2040 locations->SetInAt(0, Location::RequiresRegister());
2041 if (SelectCanUseCMOV(select)) {
2042 if (select->InputAt(1)->IsConstant()) {
2043 // Cmov can't handle a constant value.
2044 locations->SetInAt(1, Location::RequiresRegister());
2045 } else {
2046 locations->SetInAt(1, Location::Any());
2047 }
2048 } else {
2049 locations->SetInAt(1, Location::Any());
2050 }
2051 }
2052 if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) {
2053 locations->SetInAt(2, Location::RequiresRegister());
2054 }
2055 locations->SetOut(Location::SameAsFirstInput());
2056 }
2057
VisitSelect(HSelect * select)2058 void InstructionCodeGeneratorX86::VisitSelect(HSelect* select) {
2059 LocationSummary* locations = select->GetLocations();
2060 DCHECK(locations->InAt(0).Equals(locations->Out()));
2061 if (SelectCanUseCMOV(select)) {
2062 // If both the condition and the source types are integer, we can generate
2063 // a CMOV to implement Select.
2064
2065 HInstruction* select_condition = select->GetCondition();
2066 Condition cond = kNotEqual;
2067
2068 // Figure out how to test the 'condition'.
2069 if (select_condition->IsCondition()) {
2070 HCondition* condition = select_condition->AsCondition();
2071 if (!condition->IsEmittedAtUseSite()) {
2072 // This was a previously materialized condition.
2073 // Can we use the existing condition code?
2074 if (AreEflagsSetFrom(condition, select)) {
2075 // Materialization was the previous instruction. Condition codes are right.
2076 cond = X86Condition(condition->GetCondition());
2077 } else {
2078 // No, we have to recreate the condition code.
2079 Register cond_reg = locations->InAt(2).AsRegister<Register>();
2080 __ testl(cond_reg, cond_reg);
2081 }
2082 } else {
2083 // We can't handle FP or long here.
2084 DCHECK_NE(condition->InputAt(0)->GetType(), DataType::Type::kInt64);
2085 DCHECK(!DataType::IsFloatingPointType(condition->InputAt(0)->GetType()));
2086 LocationSummary* cond_locations = condition->GetLocations();
2087 codegen_->GenerateIntCompare(cond_locations->InAt(0), cond_locations->InAt(1));
2088 cond = X86Condition(condition->GetCondition());
2089 }
2090 } else {
2091 // Must be a Boolean condition, which needs to be compared to 0.
2092 Register cond_reg = locations->InAt(2).AsRegister<Register>();
2093 __ testl(cond_reg, cond_reg);
2094 }
2095
2096 // If the condition is true, overwrite the output, which already contains false.
2097 Location false_loc = locations->InAt(0);
2098 Location true_loc = locations->InAt(1);
2099 if (select->GetType() == DataType::Type::kInt64) {
2100 // 64 bit conditional move.
2101 Register false_high = false_loc.AsRegisterPairHigh<Register>();
2102 Register false_low = false_loc.AsRegisterPairLow<Register>();
2103 if (true_loc.IsRegisterPair()) {
2104 __ cmovl(cond, false_high, true_loc.AsRegisterPairHigh<Register>());
2105 __ cmovl(cond, false_low, true_loc.AsRegisterPairLow<Register>());
2106 } else {
2107 __ cmovl(cond, false_high, Address(ESP, true_loc.GetHighStackIndex(kX86WordSize)));
2108 __ cmovl(cond, false_low, Address(ESP, true_loc.GetStackIndex()));
2109 }
2110 } else {
2111 // 32 bit conditional move.
2112 Register false_reg = false_loc.AsRegister<Register>();
2113 if (true_loc.IsRegister()) {
2114 __ cmovl(cond, false_reg, true_loc.AsRegister<Register>());
2115 } else {
2116 __ cmovl(cond, false_reg, Address(ESP, true_loc.GetStackIndex()));
2117 }
2118 }
2119 } else {
2120 NearLabel false_target;
2121 GenerateTestAndBranch<NearLabel>(
2122 select, /* condition_input_index= */ 2, /* true_target= */ nullptr, &false_target);
2123 codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType());
2124 __ Bind(&false_target);
2125 }
2126 }
2127
VisitNativeDebugInfo(HNativeDebugInfo * info)2128 void LocationsBuilderX86::VisitNativeDebugInfo(HNativeDebugInfo* info) {
2129 new (GetGraph()->GetAllocator()) LocationSummary(info);
2130 }
2131
VisitNativeDebugInfo(HNativeDebugInfo *)2132 void InstructionCodeGeneratorX86::VisitNativeDebugInfo(HNativeDebugInfo*) {
2133 // MaybeRecordNativeDebugInfo is already called implicitly in CodeGenerator::Compile.
2134 }
2135
IncreaseFrame(size_t adjustment)2136 void CodeGeneratorX86::IncreaseFrame(size_t adjustment) {
2137 __ subl(ESP, Immediate(adjustment));
2138 __ cfi().AdjustCFAOffset(adjustment);
2139 }
2140
DecreaseFrame(size_t adjustment)2141 void CodeGeneratorX86::DecreaseFrame(size_t adjustment) {
2142 __ addl(ESP, Immediate(adjustment));
2143 __ cfi().AdjustCFAOffset(-adjustment);
2144 }
2145
GenerateNop()2146 void CodeGeneratorX86::GenerateNop() {
2147 __ nop();
2148 }
2149
HandleCondition(HCondition * cond)2150 void LocationsBuilderX86::HandleCondition(HCondition* cond) {
2151 LocationSummary* locations =
2152 new (GetGraph()->GetAllocator()) LocationSummary(cond, LocationSummary::kNoCall);
2153 // Handle the long/FP comparisons made in instruction simplification.
2154 switch (cond->InputAt(0)->GetType()) {
2155 case DataType::Type::kInt64: {
2156 locations->SetInAt(0, Location::RequiresRegister());
2157 locations->SetInAt(1, Location::Any());
2158 if (!cond->IsEmittedAtUseSite()) {
2159 locations->SetOut(Location::RequiresRegister());
2160 }
2161 break;
2162 }
2163 case DataType::Type::kFloat32:
2164 case DataType::Type::kFloat64: {
2165 locations->SetInAt(0, Location::RequiresFpuRegister());
2166 if (cond->InputAt(1)->IsX86LoadFromConstantTable()) {
2167 DCHECK(cond->InputAt(1)->IsEmittedAtUseSite());
2168 } else if (cond->InputAt(1)->IsConstant()) {
2169 locations->SetInAt(1, Location::RequiresFpuRegister());
2170 } else {
2171 locations->SetInAt(1, Location::Any());
2172 }
2173 if (!cond->IsEmittedAtUseSite()) {
2174 locations->SetOut(Location::RequiresRegister());
2175 }
2176 break;
2177 }
2178 default:
2179 locations->SetInAt(0, Location::RequiresRegister());
2180 locations->SetInAt(1, Location::Any());
2181 if (!cond->IsEmittedAtUseSite()) {
2182 // We need a byte register.
2183 locations->SetOut(Location::RegisterLocation(ECX));
2184 }
2185 break;
2186 }
2187 }
2188
HandleCondition(HCondition * cond)2189 void InstructionCodeGeneratorX86::HandleCondition(HCondition* cond) {
2190 if (cond->IsEmittedAtUseSite()) {
2191 return;
2192 }
2193
2194 LocationSummary* locations = cond->GetLocations();
2195 Location lhs = locations->InAt(0);
2196 Location rhs = locations->InAt(1);
2197 Register reg = locations->Out().AsRegister<Register>();
2198 NearLabel true_label, false_label;
2199
2200 switch (cond->InputAt(0)->GetType()) {
2201 default: {
2202 // Integer case.
2203
2204 // Clear output register: setb only sets the low byte.
2205 __ xorl(reg, reg);
2206 codegen_->GenerateIntCompare(lhs, rhs);
2207 __ setb(X86Condition(cond->GetCondition()), reg);
2208 return;
2209 }
2210 case DataType::Type::kInt64:
2211 GenerateLongComparesAndJumps(cond, &true_label, &false_label);
2212 break;
2213 case DataType::Type::kFloat32:
2214 GenerateFPCompare(lhs, rhs, cond, false);
2215 GenerateFPJumps(cond, &true_label, &false_label);
2216 break;
2217 case DataType::Type::kFloat64:
2218 GenerateFPCompare(lhs, rhs, cond, true);
2219 GenerateFPJumps(cond, &true_label, &false_label);
2220 break;
2221 }
2222
2223 // Convert the jumps into the result.
2224 NearLabel done_label;
2225
2226 // False case: result = 0.
2227 __ Bind(&false_label);
2228 __ xorl(reg, reg);
2229 __ jmp(&done_label);
2230
2231 // True case: result = 1.
2232 __ Bind(&true_label);
2233 __ movl(reg, Immediate(1));
2234 __ Bind(&done_label);
2235 }
2236
VisitEqual(HEqual * comp)2237 void LocationsBuilderX86::VisitEqual(HEqual* comp) {
2238 HandleCondition(comp);
2239 }
2240
VisitEqual(HEqual * comp)2241 void InstructionCodeGeneratorX86::VisitEqual(HEqual* comp) {
2242 HandleCondition(comp);
2243 }
2244
VisitNotEqual(HNotEqual * comp)2245 void LocationsBuilderX86::VisitNotEqual(HNotEqual* comp) {
2246 HandleCondition(comp);
2247 }
2248
VisitNotEqual(HNotEqual * comp)2249 void InstructionCodeGeneratorX86::VisitNotEqual(HNotEqual* comp) {
2250 HandleCondition(comp);
2251 }
2252
VisitLessThan(HLessThan * comp)2253 void LocationsBuilderX86::VisitLessThan(HLessThan* comp) {
2254 HandleCondition(comp);
2255 }
2256
VisitLessThan(HLessThan * comp)2257 void InstructionCodeGeneratorX86::VisitLessThan(HLessThan* comp) {
2258 HandleCondition(comp);
2259 }
2260
VisitLessThanOrEqual(HLessThanOrEqual * comp)2261 void LocationsBuilderX86::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
2262 HandleCondition(comp);
2263 }
2264
VisitLessThanOrEqual(HLessThanOrEqual * comp)2265 void InstructionCodeGeneratorX86::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
2266 HandleCondition(comp);
2267 }
2268
VisitGreaterThan(HGreaterThan * comp)2269 void LocationsBuilderX86::VisitGreaterThan(HGreaterThan* comp) {
2270 HandleCondition(comp);
2271 }
2272
VisitGreaterThan(HGreaterThan * comp)2273 void InstructionCodeGeneratorX86::VisitGreaterThan(HGreaterThan* comp) {
2274 HandleCondition(comp);
2275 }
2276
VisitGreaterThanOrEqual(HGreaterThanOrEqual * comp)2277 void LocationsBuilderX86::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
2278 HandleCondition(comp);
2279 }
2280
VisitGreaterThanOrEqual(HGreaterThanOrEqual * comp)2281 void InstructionCodeGeneratorX86::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
2282 HandleCondition(comp);
2283 }
2284
VisitBelow(HBelow * comp)2285 void LocationsBuilderX86::VisitBelow(HBelow* comp) {
2286 HandleCondition(comp);
2287 }
2288
VisitBelow(HBelow * comp)2289 void InstructionCodeGeneratorX86::VisitBelow(HBelow* comp) {
2290 HandleCondition(comp);
2291 }
2292
VisitBelowOrEqual(HBelowOrEqual * comp)2293 void LocationsBuilderX86::VisitBelowOrEqual(HBelowOrEqual* comp) {
2294 HandleCondition(comp);
2295 }
2296
VisitBelowOrEqual(HBelowOrEqual * comp)2297 void InstructionCodeGeneratorX86::VisitBelowOrEqual(HBelowOrEqual* comp) {
2298 HandleCondition(comp);
2299 }
2300
VisitAbove(HAbove * comp)2301 void LocationsBuilderX86::VisitAbove(HAbove* comp) {
2302 HandleCondition(comp);
2303 }
2304
VisitAbove(HAbove * comp)2305 void InstructionCodeGeneratorX86::VisitAbove(HAbove* comp) {
2306 HandleCondition(comp);
2307 }
2308
VisitAboveOrEqual(HAboveOrEqual * comp)2309 void LocationsBuilderX86::VisitAboveOrEqual(HAboveOrEqual* comp) {
2310 HandleCondition(comp);
2311 }
2312
VisitAboveOrEqual(HAboveOrEqual * comp)2313 void InstructionCodeGeneratorX86::VisitAboveOrEqual(HAboveOrEqual* comp) {
2314 HandleCondition(comp);
2315 }
2316
VisitIntConstant(HIntConstant * constant)2317 void LocationsBuilderX86::VisitIntConstant(HIntConstant* constant) {
2318 LocationSummary* locations =
2319 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2320 locations->SetOut(Location::ConstantLocation(constant));
2321 }
2322
VisitIntConstant(HIntConstant * constant ATTRIBUTE_UNUSED)2323 void InstructionCodeGeneratorX86::VisitIntConstant(HIntConstant* constant ATTRIBUTE_UNUSED) {
2324 // Will be generated at use site.
2325 }
2326
VisitNullConstant(HNullConstant * constant)2327 void LocationsBuilderX86::VisitNullConstant(HNullConstant* constant) {
2328 LocationSummary* locations =
2329 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2330 locations->SetOut(Location::ConstantLocation(constant));
2331 }
2332
VisitNullConstant(HNullConstant * constant ATTRIBUTE_UNUSED)2333 void InstructionCodeGeneratorX86::VisitNullConstant(HNullConstant* constant ATTRIBUTE_UNUSED) {
2334 // Will be generated at use site.
2335 }
2336
VisitLongConstant(HLongConstant * constant)2337 void LocationsBuilderX86::VisitLongConstant(HLongConstant* constant) {
2338 LocationSummary* locations =
2339 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2340 locations->SetOut(Location::ConstantLocation(constant));
2341 }
2342
VisitLongConstant(HLongConstant * constant ATTRIBUTE_UNUSED)2343 void InstructionCodeGeneratorX86::VisitLongConstant(HLongConstant* constant ATTRIBUTE_UNUSED) {
2344 // Will be generated at use site.
2345 }
2346
VisitFloatConstant(HFloatConstant * constant)2347 void LocationsBuilderX86::VisitFloatConstant(HFloatConstant* constant) {
2348 LocationSummary* locations =
2349 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2350 locations->SetOut(Location::ConstantLocation(constant));
2351 }
2352
VisitFloatConstant(HFloatConstant * constant ATTRIBUTE_UNUSED)2353 void InstructionCodeGeneratorX86::VisitFloatConstant(HFloatConstant* constant ATTRIBUTE_UNUSED) {
2354 // Will be generated at use site.
2355 }
2356
VisitDoubleConstant(HDoubleConstant * constant)2357 void LocationsBuilderX86::VisitDoubleConstant(HDoubleConstant* constant) {
2358 LocationSummary* locations =
2359 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2360 locations->SetOut(Location::ConstantLocation(constant));
2361 }
2362
VisitDoubleConstant(HDoubleConstant * constant ATTRIBUTE_UNUSED)2363 void InstructionCodeGeneratorX86::VisitDoubleConstant(HDoubleConstant* constant ATTRIBUTE_UNUSED) {
2364 // Will be generated at use site.
2365 }
2366
VisitConstructorFence(HConstructorFence * constructor_fence)2367 void LocationsBuilderX86::VisitConstructorFence(HConstructorFence* constructor_fence) {
2368 constructor_fence->SetLocations(nullptr);
2369 }
2370
VisitConstructorFence(HConstructorFence * constructor_fence ATTRIBUTE_UNUSED)2371 void InstructionCodeGeneratorX86::VisitConstructorFence(
2372 HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) {
2373 codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
2374 }
2375
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)2376 void LocationsBuilderX86::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
2377 memory_barrier->SetLocations(nullptr);
2378 }
2379
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)2380 void InstructionCodeGeneratorX86::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
2381 codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
2382 }
2383
VisitReturnVoid(HReturnVoid * ret)2384 void LocationsBuilderX86::VisitReturnVoid(HReturnVoid* ret) {
2385 ret->SetLocations(nullptr);
2386 }
2387
VisitReturnVoid(HReturnVoid * ret ATTRIBUTE_UNUSED)2388 void InstructionCodeGeneratorX86::VisitReturnVoid(HReturnVoid* ret ATTRIBUTE_UNUSED) {
2389 codegen_->GenerateFrameExit();
2390 }
2391
VisitReturn(HReturn * ret)2392 void LocationsBuilderX86::VisitReturn(HReturn* ret) {
2393 LocationSummary* locations =
2394 new (GetGraph()->GetAllocator()) LocationSummary(ret, LocationSummary::kNoCall);
2395 switch (ret->InputAt(0)->GetType()) {
2396 case DataType::Type::kReference:
2397 case DataType::Type::kBool:
2398 case DataType::Type::kUint8:
2399 case DataType::Type::kInt8:
2400 case DataType::Type::kUint16:
2401 case DataType::Type::kInt16:
2402 case DataType::Type::kInt32:
2403 locations->SetInAt(0, Location::RegisterLocation(EAX));
2404 break;
2405
2406 case DataType::Type::kInt64:
2407 locations->SetInAt(
2408 0, Location::RegisterPairLocation(EAX, EDX));
2409 break;
2410
2411 case DataType::Type::kFloat32:
2412 case DataType::Type::kFloat64:
2413 locations->SetInAt(
2414 0, Location::FpuRegisterLocation(XMM0));
2415 break;
2416
2417 default:
2418 LOG(FATAL) << "Unknown return type " << ret->InputAt(0)->GetType();
2419 }
2420 }
2421
VisitReturn(HReturn * ret)2422 void InstructionCodeGeneratorX86::VisitReturn(HReturn* ret) {
2423 switch (ret->InputAt(0)->GetType()) {
2424 case DataType::Type::kReference:
2425 case DataType::Type::kBool:
2426 case DataType::Type::kUint8:
2427 case DataType::Type::kInt8:
2428 case DataType::Type::kUint16:
2429 case DataType::Type::kInt16:
2430 case DataType::Type::kInt32:
2431 DCHECK_EQ(ret->GetLocations()->InAt(0).AsRegister<Register>(), EAX);
2432 break;
2433
2434 case DataType::Type::kInt64:
2435 DCHECK_EQ(ret->GetLocations()->InAt(0).AsRegisterPairLow<Register>(), EAX);
2436 DCHECK_EQ(ret->GetLocations()->InAt(0).AsRegisterPairHigh<Register>(), EDX);
2437 break;
2438
2439 case DataType::Type::kFloat32:
2440 DCHECK_EQ(ret->GetLocations()->InAt(0).AsFpuRegister<XmmRegister>(), XMM0);
2441 if (GetGraph()->IsCompilingOsr()) {
2442 // To simplify callers of an OSR method, we put the return value in both
2443 // floating point and core registers.
2444 __ movd(EAX, XMM0);
2445 }
2446 break;
2447
2448 case DataType::Type::kFloat64:
2449 DCHECK_EQ(ret->GetLocations()->InAt(0).AsFpuRegister<XmmRegister>(), XMM0);
2450 if (GetGraph()->IsCompilingOsr()) {
2451 // To simplify callers of an OSR method, we put the return value in both
2452 // floating point and core registers.
2453 __ movd(EAX, XMM0);
2454 // Use XMM1 as temporary register to not clobber XMM0.
2455 __ movaps(XMM1, XMM0);
2456 __ psrlq(XMM1, Immediate(32));
2457 __ movd(EDX, XMM1);
2458 }
2459 break;
2460
2461 default:
2462 LOG(FATAL) << "Unknown return type " << ret->InputAt(0)->GetType();
2463 }
2464 codegen_->GenerateFrameExit();
2465 }
2466
VisitInvokeUnresolved(HInvokeUnresolved * invoke)2467 void LocationsBuilderX86::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
2468 // The trampoline uses the same calling convention as dex calling conventions,
2469 // except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain
2470 // the method_idx.
2471 HandleInvoke(invoke);
2472 }
2473
VisitInvokeUnresolved(HInvokeUnresolved * invoke)2474 void InstructionCodeGeneratorX86::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
2475 codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke);
2476 }
2477
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)2478 void LocationsBuilderX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
2479 // Explicit clinit checks triggered by static invokes must have been pruned by
2480 // art::PrepareForRegisterAllocation.
2481 DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
2482
2483 IntrinsicLocationsBuilderX86 intrinsic(codegen_);
2484 if (intrinsic.TryDispatch(invoke)) {
2485 if (invoke->GetLocations()->CanCall() &&
2486 invoke->HasPcRelativeMethodLoadKind() &&
2487 invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()).IsInvalid()) {
2488 invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::Any());
2489 }
2490 return;
2491 }
2492
2493 if (invoke->GetCodePtrLocation() == CodePtrLocation::kCallCriticalNative) {
2494 CriticalNativeCallingConventionVisitorX86 calling_convention_visitor(
2495 /*for_register_allocation=*/ true);
2496 CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
2497 } else {
2498 HandleInvoke(invoke);
2499 }
2500
2501 // For PC-relative load kinds the invoke has an extra input, the PC-relative address base.
2502 if (invoke->HasPcRelativeMethodLoadKind()) {
2503 invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::RequiresRegister());
2504 }
2505 }
2506
TryGenerateIntrinsicCode(HInvoke * invoke,CodeGeneratorX86 * codegen)2507 static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorX86* codegen) {
2508 if (invoke->GetLocations()->Intrinsified()) {
2509 IntrinsicCodeGeneratorX86 intrinsic(codegen);
2510 intrinsic.Dispatch(invoke);
2511 return true;
2512 }
2513 return false;
2514 }
2515
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)2516 void InstructionCodeGeneratorX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
2517 // Explicit clinit checks triggered by static invokes must have been pruned by
2518 // art::PrepareForRegisterAllocation.
2519 DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
2520
2521 if (TryGenerateIntrinsicCode(invoke, codegen_)) {
2522 return;
2523 }
2524
2525 LocationSummary* locations = invoke->GetLocations();
2526 codegen_->GenerateStaticOrDirectCall(
2527 invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation());
2528 }
2529
VisitInvokeVirtual(HInvokeVirtual * invoke)2530 void LocationsBuilderX86::VisitInvokeVirtual(HInvokeVirtual* invoke) {
2531 IntrinsicLocationsBuilderX86 intrinsic(codegen_);
2532 if (intrinsic.TryDispatch(invoke)) {
2533 return;
2534 }
2535
2536 HandleInvoke(invoke);
2537
2538 if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) {
2539 // Add one temporary for inline cache update.
2540 invoke->GetLocations()->AddTemp(Location::RegisterLocation(EBP));
2541 }
2542 }
2543
HandleInvoke(HInvoke * invoke)2544 void LocationsBuilderX86::HandleInvoke(HInvoke* invoke) {
2545 InvokeDexCallingConventionVisitorX86 calling_convention_visitor;
2546 CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
2547 }
2548
VisitInvokeVirtual(HInvokeVirtual * invoke)2549 void InstructionCodeGeneratorX86::VisitInvokeVirtual(HInvokeVirtual* invoke) {
2550 if (TryGenerateIntrinsicCode(invoke, codegen_)) {
2551 return;
2552 }
2553
2554 codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
2555 DCHECK(!codegen_->IsLeafMethod());
2556 }
2557
VisitInvokeInterface(HInvokeInterface * invoke)2558 void LocationsBuilderX86::VisitInvokeInterface(HInvokeInterface* invoke) {
2559 // This call to HandleInvoke allocates a temporary (core) register
2560 // which is also used to transfer the hidden argument from FP to
2561 // core register.
2562 HandleInvoke(invoke);
2563 // Add the hidden argument.
2564 invoke->GetLocations()->AddTemp(Location::FpuRegisterLocation(XMM7));
2565
2566 if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) {
2567 // Add one temporary for inline cache update.
2568 invoke->GetLocations()->AddTemp(Location::RegisterLocation(EBP));
2569 }
2570
2571 // For PC-relative load kinds the invoke has an extra input, the PC-relative address base.
2572 if (IsPcRelativeMethodLoadKind(invoke->GetHiddenArgumentLoadKind())) {
2573 invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::RequiresRegister());
2574 }
2575
2576 if (invoke->GetHiddenArgumentLoadKind() == MethodLoadKind::kRecursive) {
2577 invoke->GetLocations()->SetInAt(invoke->GetNumberOfArguments() - 1,
2578 Location::RequiresRegister());
2579 }
2580 }
2581
MaybeGenerateInlineCacheCheck(HInstruction * instruction,Register klass)2582 void CodeGeneratorX86::MaybeGenerateInlineCacheCheck(HInstruction* instruction, Register klass) {
2583 DCHECK_EQ(EAX, klass);
2584 // We know the destination of an intrinsic, so no need to record inline
2585 // caches (also the intrinsic location builder doesn't request an additional
2586 // temporary).
2587 if (!instruction->GetLocations()->Intrinsified() &&
2588 GetGraph()->IsCompilingBaseline() &&
2589 !Runtime::Current()->IsAotCompiler()) {
2590 DCHECK(!instruction->GetEnvironment()->IsFromInlinedInvoke());
2591 ScopedProfilingInfoUse spiu(
2592 Runtime::Current()->GetJit(), GetGraph()->GetArtMethod(), Thread::Current());
2593 ProfilingInfo* info = spiu.GetProfilingInfo();
2594 if (info != nullptr) {
2595 InlineCache* cache = info->GetInlineCache(instruction->GetDexPc());
2596 uint32_t address = reinterpret_cast32<uint32_t>(cache);
2597 if (kIsDebugBuild) {
2598 uint32_t temp_index = instruction->GetLocations()->GetTempCount() - 1u;
2599 CHECK_EQ(EBP, instruction->GetLocations()->GetTemp(temp_index).AsRegister<Register>());
2600 }
2601 Register temp = EBP;
2602 NearLabel done;
2603 __ movl(temp, Immediate(address));
2604 // Fast path for a monomorphic cache.
2605 __ cmpl(klass, Address(temp, InlineCache::ClassesOffset().Int32Value()));
2606 __ j(kEqual, &done);
2607 GenerateInvokeRuntime(GetThreadOffset<kX86PointerSize>(kQuickUpdateInlineCache).Int32Value());
2608 __ Bind(&done);
2609 }
2610 }
2611 }
2612
VisitInvokeInterface(HInvokeInterface * invoke)2613 void InstructionCodeGeneratorX86::VisitInvokeInterface(HInvokeInterface* invoke) {
2614 // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
2615 LocationSummary* locations = invoke->GetLocations();
2616 Register temp = locations->GetTemp(0).AsRegister<Register>();
2617 XmmRegister hidden_reg = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
2618 Location receiver = locations->InAt(0);
2619 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
2620
2621 // Set the hidden argument. This is safe to do this here, as XMM7
2622 // won't be modified thereafter, before the `call` instruction.
2623 DCHECK_EQ(XMM7, hidden_reg);
2624 if (invoke->GetHiddenArgumentLoadKind() == MethodLoadKind::kRecursive) {
2625 __ movd(hidden_reg, locations->InAt(invoke->GetNumberOfArguments() - 1).AsRegister<Register>());
2626 } else if (invoke->GetHiddenArgumentLoadKind() != MethodLoadKind::kRuntimeCall) {
2627 codegen_->LoadMethod(invoke->GetHiddenArgumentLoadKind(), locations->GetTemp(0), invoke);
2628 __ movd(hidden_reg, temp);
2629 }
2630
2631 if (receiver.IsStackSlot()) {
2632 __ movl(temp, Address(ESP, receiver.GetStackIndex()));
2633 // /* HeapReference<Class> */ temp = temp->klass_
2634 __ movl(temp, Address(temp, class_offset));
2635 } else {
2636 // /* HeapReference<Class> */ temp = receiver->klass_
2637 __ movl(temp, Address(receiver.AsRegister<Register>(), class_offset));
2638 }
2639 codegen_->MaybeRecordImplicitNullCheck(invoke);
2640 // Instead of simply (possibly) unpoisoning `temp` here, we should
2641 // emit a read barrier for the previous class reference load.
2642 // However this is not required in practice, as this is an
2643 // intermediate/temporary reference and because the current
2644 // concurrent copying collector keeps the from-space memory
2645 // intact/accessible until the end of the marking phase (the
2646 // concurrent copying collector may not in the future).
2647 __ MaybeUnpoisonHeapReference(temp);
2648
2649 codegen_->MaybeGenerateInlineCacheCheck(invoke, temp);
2650
2651 // temp = temp->GetAddressOfIMT()
2652 __ movl(temp,
2653 Address(temp, mirror::Class::ImtPtrOffset(kX86PointerSize).Uint32Value()));
2654 // temp = temp->GetImtEntryAt(method_offset);
2655 uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
2656 invoke->GetImtIndex(), kX86PointerSize));
2657 __ movl(temp, Address(temp, method_offset));
2658 if (invoke->GetHiddenArgumentLoadKind() == MethodLoadKind::kRuntimeCall) {
2659 // We pass the method from the IMT in case of a conflict. This will ensure
2660 // we go into the runtime to resolve the actual method.
2661 __ movd(hidden_reg, temp);
2662 }
2663 // call temp->GetEntryPoint();
2664 __ call(Address(temp,
2665 ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86PointerSize).Int32Value()));
2666
2667 DCHECK(!codegen_->IsLeafMethod());
2668 codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
2669 }
2670
VisitInvokePolymorphic(HInvokePolymorphic * invoke)2671 void LocationsBuilderX86::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
2672 IntrinsicLocationsBuilderX86 intrinsic(codegen_);
2673 if (intrinsic.TryDispatch(invoke)) {
2674 return;
2675 }
2676 HandleInvoke(invoke);
2677 }
2678
VisitInvokePolymorphic(HInvokePolymorphic * invoke)2679 void InstructionCodeGeneratorX86::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
2680 if (TryGenerateIntrinsicCode(invoke, codegen_)) {
2681 return;
2682 }
2683 codegen_->GenerateInvokePolymorphicCall(invoke);
2684 }
2685
VisitInvokeCustom(HInvokeCustom * invoke)2686 void LocationsBuilderX86::VisitInvokeCustom(HInvokeCustom* invoke) {
2687 HandleInvoke(invoke);
2688 }
2689
VisitInvokeCustom(HInvokeCustom * invoke)2690 void InstructionCodeGeneratorX86::VisitInvokeCustom(HInvokeCustom* invoke) {
2691 codegen_->GenerateInvokeCustomCall(invoke);
2692 }
2693
VisitNeg(HNeg * neg)2694 void LocationsBuilderX86::VisitNeg(HNeg* neg) {
2695 LocationSummary* locations =
2696 new (GetGraph()->GetAllocator()) LocationSummary(neg, LocationSummary::kNoCall);
2697 switch (neg->GetResultType()) {
2698 case DataType::Type::kInt32:
2699 case DataType::Type::kInt64:
2700 locations->SetInAt(0, Location::RequiresRegister());
2701 locations->SetOut(Location::SameAsFirstInput());
2702 break;
2703
2704 case DataType::Type::kFloat32:
2705 locations->SetInAt(0, Location::RequiresFpuRegister());
2706 locations->SetOut(Location::SameAsFirstInput());
2707 locations->AddTemp(Location::RequiresRegister());
2708 locations->AddTemp(Location::RequiresFpuRegister());
2709 break;
2710
2711 case DataType::Type::kFloat64:
2712 locations->SetInAt(0, Location::RequiresFpuRegister());
2713 locations->SetOut(Location::SameAsFirstInput());
2714 locations->AddTemp(Location::RequiresFpuRegister());
2715 break;
2716
2717 default:
2718 LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
2719 }
2720 }
2721
VisitNeg(HNeg * neg)2722 void InstructionCodeGeneratorX86::VisitNeg(HNeg* neg) {
2723 LocationSummary* locations = neg->GetLocations();
2724 Location out = locations->Out();
2725 Location in = locations->InAt(0);
2726 switch (neg->GetResultType()) {
2727 case DataType::Type::kInt32:
2728 DCHECK(in.IsRegister());
2729 DCHECK(in.Equals(out));
2730 __ negl(out.AsRegister<Register>());
2731 break;
2732
2733 case DataType::Type::kInt64:
2734 DCHECK(in.IsRegisterPair());
2735 DCHECK(in.Equals(out));
2736 __ negl(out.AsRegisterPairLow<Register>());
2737 // Negation is similar to subtraction from zero. The least
2738 // significant byte triggers a borrow when it is different from
2739 // zero; to take it into account, add 1 to the most significant
2740 // byte if the carry flag (CF) is set to 1 after the first NEGL
2741 // operation.
2742 __ adcl(out.AsRegisterPairHigh<Register>(), Immediate(0));
2743 __ negl(out.AsRegisterPairHigh<Register>());
2744 break;
2745
2746 case DataType::Type::kFloat32: {
2747 DCHECK(in.Equals(out));
2748 Register constant = locations->GetTemp(0).AsRegister<Register>();
2749 XmmRegister mask = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
2750 // Implement float negation with an exclusive or with value
2751 // 0x80000000 (mask for bit 31, representing the sign of a
2752 // single-precision floating-point number).
2753 __ movl(constant, Immediate(INT32_C(0x80000000)));
2754 __ movd(mask, constant);
2755 __ xorps(out.AsFpuRegister<XmmRegister>(), mask);
2756 break;
2757 }
2758
2759 case DataType::Type::kFloat64: {
2760 DCHECK(in.Equals(out));
2761 XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
2762 // Implement double negation with an exclusive or with value
2763 // 0x8000000000000000 (mask for bit 63, representing the sign of
2764 // a double-precision floating-point number).
2765 __ LoadLongConstant(mask, INT64_C(0x8000000000000000));
2766 __ xorpd(out.AsFpuRegister<XmmRegister>(), mask);
2767 break;
2768 }
2769
2770 default:
2771 LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
2772 }
2773 }
2774
VisitX86FPNeg(HX86FPNeg * neg)2775 void LocationsBuilderX86::VisitX86FPNeg(HX86FPNeg* neg) {
2776 LocationSummary* locations =
2777 new (GetGraph()->GetAllocator()) LocationSummary(neg, LocationSummary::kNoCall);
2778 DCHECK(DataType::IsFloatingPointType(neg->GetType()));
2779 locations->SetInAt(0, Location::RequiresFpuRegister());
2780 locations->SetInAt(1, Location::RequiresRegister());
2781 locations->SetOut(Location::SameAsFirstInput());
2782 locations->AddTemp(Location::RequiresFpuRegister());
2783 }
2784
VisitX86FPNeg(HX86FPNeg * neg)2785 void InstructionCodeGeneratorX86::VisitX86FPNeg(HX86FPNeg* neg) {
2786 LocationSummary* locations = neg->GetLocations();
2787 Location out = locations->Out();
2788 DCHECK(locations->InAt(0).Equals(out));
2789
2790 Register constant_area = locations->InAt(1).AsRegister<Register>();
2791 XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
2792 if (neg->GetType() == DataType::Type::kFloat32) {
2793 __ movss(mask, codegen_->LiteralInt32Address(INT32_C(0x80000000),
2794 neg->GetBaseMethodAddress(),
2795 constant_area));
2796 __ xorps(out.AsFpuRegister<XmmRegister>(), mask);
2797 } else {
2798 __ movsd(mask, codegen_->LiteralInt64Address(INT64_C(0x8000000000000000),
2799 neg->GetBaseMethodAddress(),
2800 constant_area));
2801 __ xorpd(out.AsFpuRegister<XmmRegister>(), mask);
2802 }
2803 }
2804
VisitTypeConversion(HTypeConversion * conversion)2805 void LocationsBuilderX86::VisitTypeConversion(HTypeConversion* conversion) {
2806 DataType::Type result_type = conversion->GetResultType();
2807 DataType::Type input_type = conversion->GetInputType();
2808 DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
2809 << input_type << " -> " << result_type;
2810
2811 // The float-to-long and double-to-long type conversions rely on a
2812 // call to the runtime.
2813 LocationSummary::CallKind call_kind =
2814 ((input_type == DataType::Type::kFloat32 || input_type == DataType::Type::kFloat64)
2815 && result_type == DataType::Type::kInt64)
2816 ? LocationSummary::kCallOnMainOnly
2817 : LocationSummary::kNoCall;
2818 LocationSummary* locations =
2819 new (GetGraph()->GetAllocator()) LocationSummary(conversion, call_kind);
2820
2821 switch (result_type) {
2822 case DataType::Type::kUint8:
2823 case DataType::Type::kInt8:
2824 switch (input_type) {
2825 case DataType::Type::kUint8:
2826 case DataType::Type::kInt8:
2827 case DataType::Type::kUint16:
2828 case DataType::Type::kInt16:
2829 case DataType::Type::kInt32:
2830 locations->SetInAt(0, Location::ByteRegisterOrConstant(ECX, conversion->InputAt(0)));
2831 // Make the output overlap to please the register allocator. This greatly simplifies
2832 // the validation of the linear scan implementation
2833 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
2834 break;
2835 case DataType::Type::kInt64: {
2836 HInstruction* input = conversion->InputAt(0);
2837 Location input_location = input->IsConstant()
2838 ? Location::ConstantLocation(input->AsConstant())
2839 : Location::RegisterPairLocation(EAX, EDX);
2840 locations->SetInAt(0, input_location);
2841 // Make the output overlap to please the register allocator. This greatly simplifies
2842 // the validation of the linear scan implementation
2843 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
2844 break;
2845 }
2846
2847 default:
2848 LOG(FATAL) << "Unexpected type conversion from " << input_type
2849 << " to " << result_type;
2850 }
2851 break;
2852
2853 case DataType::Type::kUint16:
2854 case DataType::Type::kInt16:
2855 DCHECK(DataType::IsIntegralType(input_type)) << input_type;
2856 locations->SetInAt(0, Location::Any());
2857 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2858 break;
2859
2860 case DataType::Type::kInt32:
2861 switch (input_type) {
2862 case DataType::Type::kInt64:
2863 locations->SetInAt(0, Location::Any());
2864 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2865 break;
2866
2867 case DataType::Type::kFloat32:
2868 locations->SetInAt(0, Location::RequiresFpuRegister());
2869 locations->SetOut(Location::RequiresRegister());
2870 locations->AddTemp(Location::RequiresFpuRegister());
2871 break;
2872
2873 case DataType::Type::kFloat64:
2874 locations->SetInAt(0, Location::RequiresFpuRegister());
2875 locations->SetOut(Location::RequiresRegister());
2876 locations->AddTemp(Location::RequiresFpuRegister());
2877 break;
2878
2879 default:
2880 LOG(FATAL) << "Unexpected type conversion from " << input_type
2881 << " to " << result_type;
2882 }
2883 break;
2884
2885 case DataType::Type::kInt64:
2886 switch (input_type) {
2887 case DataType::Type::kBool:
2888 case DataType::Type::kUint8:
2889 case DataType::Type::kInt8:
2890 case DataType::Type::kUint16:
2891 case DataType::Type::kInt16:
2892 case DataType::Type::kInt32:
2893 locations->SetInAt(0, Location::RegisterLocation(EAX));
2894 locations->SetOut(Location::RegisterPairLocation(EAX, EDX));
2895 break;
2896
2897 case DataType::Type::kFloat32:
2898 case DataType::Type::kFloat64: {
2899 InvokeRuntimeCallingConvention calling_convention;
2900 XmmRegister parameter = calling_convention.GetFpuRegisterAt(0);
2901 locations->SetInAt(0, Location::FpuRegisterLocation(parameter));
2902
2903 // The runtime helper puts the result in EAX, EDX.
2904 locations->SetOut(Location::RegisterPairLocation(EAX, EDX));
2905 }
2906 break;
2907
2908 default:
2909 LOG(FATAL) << "Unexpected type conversion from " << input_type
2910 << " to " << result_type;
2911 }
2912 break;
2913
2914 case DataType::Type::kFloat32:
2915 switch (input_type) {
2916 case DataType::Type::kBool:
2917 case DataType::Type::kUint8:
2918 case DataType::Type::kInt8:
2919 case DataType::Type::kUint16:
2920 case DataType::Type::kInt16:
2921 case DataType::Type::kInt32:
2922 locations->SetInAt(0, Location::RequiresRegister());
2923 locations->SetOut(Location::RequiresFpuRegister());
2924 break;
2925
2926 case DataType::Type::kInt64:
2927 locations->SetInAt(0, Location::Any());
2928 locations->SetOut(Location::Any());
2929 break;
2930
2931 case DataType::Type::kFloat64:
2932 locations->SetInAt(0, Location::RequiresFpuRegister());
2933 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
2934 break;
2935
2936 default:
2937 LOG(FATAL) << "Unexpected type conversion from " << input_type
2938 << " to " << result_type;
2939 }
2940 break;
2941
2942 case DataType::Type::kFloat64:
2943 switch (input_type) {
2944 case DataType::Type::kBool:
2945 case DataType::Type::kUint8:
2946 case DataType::Type::kInt8:
2947 case DataType::Type::kUint16:
2948 case DataType::Type::kInt16:
2949 case DataType::Type::kInt32:
2950 locations->SetInAt(0, Location::RequiresRegister());
2951 locations->SetOut(Location::RequiresFpuRegister());
2952 break;
2953
2954 case DataType::Type::kInt64:
2955 locations->SetInAt(0, Location::Any());
2956 locations->SetOut(Location::Any());
2957 break;
2958
2959 case DataType::Type::kFloat32:
2960 locations->SetInAt(0, Location::RequiresFpuRegister());
2961 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
2962 break;
2963
2964 default:
2965 LOG(FATAL) << "Unexpected type conversion from " << input_type
2966 << " to " << result_type;
2967 }
2968 break;
2969
2970 default:
2971 LOG(FATAL) << "Unexpected type conversion from " << input_type
2972 << " to " << result_type;
2973 }
2974 }
2975
VisitTypeConversion(HTypeConversion * conversion)2976 void InstructionCodeGeneratorX86::VisitTypeConversion(HTypeConversion* conversion) {
2977 LocationSummary* locations = conversion->GetLocations();
2978 Location out = locations->Out();
2979 Location in = locations->InAt(0);
2980 DataType::Type result_type = conversion->GetResultType();
2981 DataType::Type input_type = conversion->GetInputType();
2982 DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
2983 << input_type << " -> " << result_type;
2984 switch (result_type) {
2985 case DataType::Type::kUint8:
2986 switch (input_type) {
2987 case DataType::Type::kInt8:
2988 case DataType::Type::kUint16:
2989 case DataType::Type::kInt16:
2990 case DataType::Type::kInt32:
2991 if (in.IsRegister()) {
2992 __ movzxb(out.AsRegister<Register>(), in.AsRegister<ByteRegister>());
2993 } else {
2994 DCHECK(in.GetConstant()->IsIntConstant());
2995 int32_t value = in.GetConstant()->AsIntConstant()->GetValue();
2996 __ movl(out.AsRegister<Register>(), Immediate(static_cast<uint8_t>(value)));
2997 }
2998 break;
2999 case DataType::Type::kInt64:
3000 if (in.IsRegisterPair()) {
3001 __ movzxb(out.AsRegister<Register>(), in.AsRegisterPairLow<ByteRegister>());
3002 } else {
3003 DCHECK(in.GetConstant()->IsLongConstant());
3004 int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
3005 __ movl(out.AsRegister<Register>(), Immediate(static_cast<uint8_t>(value)));
3006 }
3007 break;
3008
3009 default:
3010 LOG(FATAL) << "Unexpected type conversion from " << input_type
3011 << " to " << result_type;
3012 }
3013 break;
3014
3015 case DataType::Type::kInt8:
3016 switch (input_type) {
3017 case DataType::Type::kUint8:
3018 case DataType::Type::kUint16:
3019 case DataType::Type::kInt16:
3020 case DataType::Type::kInt32:
3021 if (in.IsRegister()) {
3022 __ movsxb(out.AsRegister<Register>(), in.AsRegister<ByteRegister>());
3023 } else {
3024 DCHECK(in.GetConstant()->IsIntConstant());
3025 int32_t value = in.GetConstant()->AsIntConstant()->GetValue();
3026 __ movl(out.AsRegister<Register>(), Immediate(static_cast<int8_t>(value)));
3027 }
3028 break;
3029 case DataType::Type::kInt64:
3030 if (in.IsRegisterPair()) {
3031 __ movsxb(out.AsRegister<Register>(), in.AsRegisterPairLow<ByteRegister>());
3032 } else {
3033 DCHECK(in.GetConstant()->IsLongConstant());
3034 int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
3035 __ movl(out.AsRegister<Register>(), Immediate(static_cast<int8_t>(value)));
3036 }
3037 break;
3038
3039 default:
3040 LOG(FATAL) << "Unexpected type conversion from " << input_type
3041 << " to " << result_type;
3042 }
3043 break;
3044
3045 case DataType::Type::kUint16:
3046 switch (input_type) {
3047 case DataType::Type::kInt8:
3048 case DataType::Type::kInt16:
3049 case DataType::Type::kInt32:
3050 if (in.IsRegister()) {
3051 __ movzxw(out.AsRegister<Register>(), in.AsRegister<Register>());
3052 } else if (in.IsStackSlot()) {
3053 __ movzxw(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex()));
3054 } else {
3055 DCHECK(in.GetConstant()->IsIntConstant());
3056 int32_t value = in.GetConstant()->AsIntConstant()->GetValue();
3057 __ movl(out.AsRegister<Register>(), Immediate(static_cast<uint16_t>(value)));
3058 }
3059 break;
3060 case DataType::Type::kInt64:
3061 if (in.IsRegisterPair()) {
3062 __ movzxw(out.AsRegister<Register>(), in.AsRegisterPairLow<Register>());
3063 } else if (in.IsDoubleStackSlot()) {
3064 __ movzxw(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex()));
3065 } else {
3066 DCHECK(in.GetConstant()->IsLongConstant());
3067 int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
3068 __ movl(out.AsRegister<Register>(), Immediate(static_cast<uint16_t>(value)));
3069 }
3070 break;
3071
3072 default:
3073 LOG(FATAL) << "Unexpected type conversion from " << input_type
3074 << " to " << result_type;
3075 }
3076 break;
3077
3078 case DataType::Type::kInt16:
3079 switch (input_type) {
3080 case DataType::Type::kUint16:
3081 case DataType::Type::kInt32:
3082 if (in.IsRegister()) {
3083 __ movsxw(out.AsRegister<Register>(), in.AsRegister<Register>());
3084 } else if (in.IsStackSlot()) {
3085 __ movsxw(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex()));
3086 } else {
3087 DCHECK(in.GetConstant()->IsIntConstant());
3088 int32_t value = in.GetConstant()->AsIntConstant()->GetValue();
3089 __ movl(out.AsRegister<Register>(), Immediate(static_cast<int16_t>(value)));
3090 }
3091 break;
3092 case DataType::Type::kInt64:
3093 if (in.IsRegisterPair()) {
3094 __ movsxw(out.AsRegister<Register>(), in.AsRegisterPairLow<Register>());
3095 } else if (in.IsDoubleStackSlot()) {
3096 __ movsxw(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex()));
3097 } else {
3098 DCHECK(in.GetConstant()->IsLongConstant());
3099 int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
3100 __ movl(out.AsRegister<Register>(), Immediate(static_cast<int16_t>(value)));
3101 }
3102 break;
3103
3104 default:
3105 LOG(FATAL) << "Unexpected type conversion from " << input_type
3106 << " to " << result_type;
3107 }
3108 break;
3109
3110 case DataType::Type::kInt32:
3111 switch (input_type) {
3112 case DataType::Type::kInt64:
3113 if (in.IsRegisterPair()) {
3114 __ movl(out.AsRegister<Register>(), in.AsRegisterPairLow<Register>());
3115 } else if (in.IsDoubleStackSlot()) {
3116 __ movl(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex()));
3117 } else {
3118 DCHECK(in.IsConstant());
3119 DCHECK(in.GetConstant()->IsLongConstant());
3120 int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
3121 __ movl(out.AsRegister<Register>(), Immediate(static_cast<int32_t>(value)));
3122 }
3123 break;
3124
3125 case DataType::Type::kFloat32: {
3126 XmmRegister input = in.AsFpuRegister<XmmRegister>();
3127 Register output = out.AsRegister<Register>();
3128 XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
3129 NearLabel done, nan;
3130
3131 __ movl(output, Immediate(kPrimIntMax));
3132 // temp = int-to-float(output)
3133 __ cvtsi2ss(temp, output);
3134 // if input >= temp goto done
3135 __ comiss(input, temp);
3136 __ j(kAboveEqual, &done);
3137 // if input == NaN goto nan
3138 __ j(kUnordered, &nan);
3139 // output = float-to-int-truncate(input)
3140 __ cvttss2si(output, input);
3141 __ jmp(&done);
3142 __ Bind(&nan);
3143 // output = 0
3144 __ xorl(output, output);
3145 __ Bind(&done);
3146 break;
3147 }
3148
3149 case DataType::Type::kFloat64: {
3150 XmmRegister input = in.AsFpuRegister<XmmRegister>();
3151 Register output = out.AsRegister<Register>();
3152 XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
3153 NearLabel done, nan;
3154
3155 __ movl(output, Immediate(kPrimIntMax));
3156 // temp = int-to-double(output)
3157 __ cvtsi2sd(temp, output);
3158 // if input >= temp goto done
3159 __ comisd(input, temp);
3160 __ j(kAboveEqual, &done);
3161 // if input == NaN goto nan
3162 __ j(kUnordered, &nan);
3163 // output = double-to-int-truncate(input)
3164 __ cvttsd2si(output, input);
3165 __ jmp(&done);
3166 __ Bind(&nan);
3167 // output = 0
3168 __ xorl(output, output);
3169 __ Bind(&done);
3170 break;
3171 }
3172
3173 default:
3174 LOG(FATAL) << "Unexpected type conversion from " << input_type
3175 << " to " << result_type;
3176 }
3177 break;
3178
3179 case DataType::Type::kInt64:
3180 switch (input_type) {
3181 case DataType::Type::kBool:
3182 case DataType::Type::kUint8:
3183 case DataType::Type::kInt8:
3184 case DataType::Type::kUint16:
3185 case DataType::Type::kInt16:
3186 case DataType::Type::kInt32:
3187 DCHECK_EQ(out.AsRegisterPairLow<Register>(), EAX);
3188 DCHECK_EQ(out.AsRegisterPairHigh<Register>(), EDX);
3189 DCHECK_EQ(in.AsRegister<Register>(), EAX);
3190 __ cdq();
3191 break;
3192
3193 case DataType::Type::kFloat32:
3194 codegen_->InvokeRuntime(kQuickF2l, conversion, conversion->GetDexPc());
3195 CheckEntrypointTypes<kQuickF2l, int64_t, float>();
3196 break;
3197
3198 case DataType::Type::kFloat64:
3199 codegen_->InvokeRuntime(kQuickD2l, conversion, conversion->GetDexPc());
3200 CheckEntrypointTypes<kQuickD2l, int64_t, double>();
3201 break;
3202
3203 default:
3204 LOG(FATAL) << "Unexpected type conversion from " << input_type
3205 << " to " << result_type;
3206 }
3207 break;
3208
3209 case DataType::Type::kFloat32:
3210 switch (input_type) {
3211 case DataType::Type::kBool:
3212 case DataType::Type::kUint8:
3213 case DataType::Type::kInt8:
3214 case DataType::Type::kUint16:
3215 case DataType::Type::kInt16:
3216 case DataType::Type::kInt32:
3217 __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<Register>());
3218 break;
3219
3220 case DataType::Type::kInt64: {
3221 size_t adjustment = 0;
3222
3223 // Create stack space for the call to
3224 // InstructionCodeGeneratorX86::PushOntoFPStack and/or X86Assembler::fstps below.
3225 // TODO: enhance register allocator to ask for stack temporaries.
3226 if (!in.IsDoubleStackSlot() || !out.IsStackSlot()) {
3227 adjustment = DataType::Size(DataType::Type::kInt64);
3228 codegen_->IncreaseFrame(adjustment);
3229 }
3230
3231 // Load the value to the FP stack, using temporaries if needed.
3232 PushOntoFPStack(in, 0, adjustment, false, true);
3233
3234 if (out.IsStackSlot()) {
3235 __ fstps(Address(ESP, out.GetStackIndex() + adjustment));
3236 } else {
3237 __ fstps(Address(ESP, 0));
3238 Location stack_temp = Location::StackSlot(0);
3239 codegen_->Move32(out, stack_temp);
3240 }
3241
3242 // Remove the temporary stack space we allocated.
3243 if (adjustment != 0) {
3244 codegen_->DecreaseFrame(adjustment);
3245 }
3246 break;
3247 }
3248
3249 case DataType::Type::kFloat64:
3250 __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
3251 break;
3252
3253 default:
3254 LOG(FATAL) << "Unexpected type conversion from " << input_type
3255 << " to " << result_type;
3256 }
3257 break;
3258
3259 case DataType::Type::kFloat64:
3260 switch (input_type) {
3261 case DataType::Type::kBool:
3262 case DataType::Type::kUint8:
3263 case DataType::Type::kInt8:
3264 case DataType::Type::kUint16:
3265 case DataType::Type::kInt16:
3266 case DataType::Type::kInt32:
3267 __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<Register>());
3268 break;
3269
3270 case DataType::Type::kInt64: {
3271 size_t adjustment = 0;
3272
3273 // Create stack space for the call to
3274 // InstructionCodeGeneratorX86::PushOntoFPStack and/or X86Assembler::fstpl below.
3275 // TODO: enhance register allocator to ask for stack temporaries.
3276 if (!in.IsDoubleStackSlot() || !out.IsDoubleStackSlot()) {
3277 adjustment = DataType::Size(DataType::Type::kInt64);
3278 codegen_->IncreaseFrame(adjustment);
3279 }
3280
3281 // Load the value to the FP stack, using temporaries if needed.
3282 PushOntoFPStack(in, 0, adjustment, false, true);
3283
3284 if (out.IsDoubleStackSlot()) {
3285 __ fstpl(Address(ESP, out.GetStackIndex() + adjustment));
3286 } else {
3287 __ fstpl(Address(ESP, 0));
3288 Location stack_temp = Location::DoubleStackSlot(0);
3289 codegen_->Move64(out, stack_temp);
3290 }
3291
3292 // Remove the temporary stack space we allocated.
3293 if (adjustment != 0) {
3294 codegen_->DecreaseFrame(adjustment);
3295 }
3296 break;
3297 }
3298
3299 case DataType::Type::kFloat32:
3300 __ cvtss2sd(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
3301 break;
3302
3303 default:
3304 LOG(FATAL) << "Unexpected type conversion from " << input_type
3305 << " to " << result_type;
3306 }
3307 break;
3308
3309 default:
3310 LOG(FATAL) << "Unexpected type conversion from " << input_type
3311 << " to " << result_type;
3312 }
3313 }
3314
VisitAdd(HAdd * add)3315 void LocationsBuilderX86::VisitAdd(HAdd* add) {
3316 LocationSummary* locations =
3317 new (GetGraph()->GetAllocator()) LocationSummary(add, LocationSummary::kNoCall);
3318 switch (add->GetResultType()) {
3319 case DataType::Type::kInt32: {
3320 locations->SetInAt(0, Location::RequiresRegister());
3321 locations->SetInAt(1, Location::RegisterOrConstant(add->InputAt(1)));
3322 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3323 break;
3324 }
3325
3326 case DataType::Type::kInt64: {
3327 locations->SetInAt(0, Location::RequiresRegister());
3328 locations->SetInAt(1, Location::Any());
3329 locations->SetOut(Location::SameAsFirstInput());
3330 break;
3331 }
3332
3333 case DataType::Type::kFloat32:
3334 case DataType::Type::kFloat64: {
3335 locations->SetInAt(0, Location::RequiresFpuRegister());
3336 if (add->InputAt(1)->IsX86LoadFromConstantTable()) {
3337 DCHECK(add->InputAt(1)->IsEmittedAtUseSite());
3338 } else if (add->InputAt(1)->IsConstant()) {
3339 locations->SetInAt(1, Location::RequiresFpuRegister());
3340 } else {
3341 locations->SetInAt(1, Location::Any());
3342 }
3343 locations->SetOut(Location::SameAsFirstInput());
3344 break;
3345 }
3346
3347 default:
3348 LOG(FATAL) << "Unexpected add type " << add->GetResultType();
3349 UNREACHABLE();
3350 }
3351 }
3352
VisitAdd(HAdd * add)3353 void InstructionCodeGeneratorX86::VisitAdd(HAdd* add) {
3354 LocationSummary* locations = add->GetLocations();
3355 Location first = locations->InAt(0);
3356 Location second = locations->InAt(1);
3357 Location out = locations->Out();
3358
3359 switch (add->GetResultType()) {
3360 case DataType::Type::kInt32: {
3361 if (second.IsRegister()) {
3362 if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3363 __ addl(out.AsRegister<Register>(), second.AsRegister<Register>());
3364 } else if (out.AsRegister<Register>() == second.AsRegister<Register>()) {
3365 __ addl(out.AsRegister<Register>(), first.AsRegister<Register>());
3366 } else {
3367 __ leal(out.AsRegister<Register>(), Address(
3368 first.AsRegister<Register>(), second.AsRegister<Register>(), TIMES_1, 0));
3369 }
3370 } else if (second.IsConstant()) {
3371 int32_t value = second.GetConstant()->AsIntConstant()->GetValue();
3372 if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3373 __ addl(out.AsRegister<Register>(), Immediate(value));
3374 } else {
3375 __ leal(out.AsRegister<Register>(), Address(first.AsRegister<Register>(), value));
3376 }
3377 } else {
3378 DCHECK(first.Equals(locations->Out()));
3379 __ addl(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
3380 }
3381 break;
3382 }
3383
3384 case DataType::Type::kInt64: {
3385 if (second.IsRegisterPair()) {
3386 __ addl(first.AsRegisterPairLow<Register>(), second.AsRegisterPairLow<Register>());
3387 __ adcl(first.AsRegisterPairHigh<Register>(), second.AsRegisterPairHigh<Register>());
3388 } else if (second.IsDoubleStackSlot()) {
3389 __ addl(first.AsRegisterPairLow<Register>(), Address(ESP, second.GetStackIndex()));
3390 __ adcl(first.AsRegisterPairHigh<Register>(),
3391 Address(ESP, second.GetHighStackIndex(kX86WordSize)));
3392 } else {
3393 DCHECK(second.IsConstant()) << second;
3394 int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
3395 __ addl(first.AsRegisterPairLow<Register>(), Immediate(Low32Bits(value)));
3396 __ adcl(first.AsRegisterPairHigh<Register>(), Immediate(High32Bits(value)));
3397 }
3398 break;
3399 }
3400
3401 case DataType::Type::kFloat32: {
3402 if (second.IsFpuRegister()) {
3403 __ addss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3404 } else if (add->InputAt(1)->IsX86LoadFromConstantTable()) {
3405 HX86LoadFromConstantTable* const_area = add->InputAt(1)->AsX86LoadFromConstantTable();
3406 DCHECK(const_area->IsEmittedAtUseSite());
3407 __ addss(first.AsFpuRegister<XmmRegister>(),
3408 codegen_->LiteralFloatAddress(
3409 const_area->GetConstant()->AsFloatConstant()->GetValue(),
3410 const_area->GetBaseMethodAddress(),
3411 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3412 } else {
3413 DCHECK(second.IsStackSlot());
3414 __ addss(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3415 }
3416 break;
3417 }
3418
3419 case DataType::Type::kFloat64: {
3420 if (second.IsFpuRegister()) {
3421 __ addsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3422 } else if (add->InputAt(1)->IsX86LoadFromConstantTable()) {
3423 HX86LoadFromConstantTable* const_area = add->InputAt(1)->AsX86LoadFromConstantTable();
3424 DCHECK(const_area->IsEmittedAtUseSite());
3425 __ addsd(first.AsFpuRegister<XmmRegister>(),
3426 codegen_->LiteralDoubleAddress(
3427 const_area->GetConstant()->AsDoubleConstant()->GetValue(),
3428 const_area->GetBaseMethodAddress(),
3429 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3430 } else {
3431 DCHECK(second.IsDoubleStackSlot());
3432 __ addsd(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3433 }
3434 break;
3435 }
3436
3437 default:
3438 LOG(FATAL) << "Unexpected add type " << add->GetResultType();
3439 }
3440 }
3441
VisitSub(HSub * sub)3442 void LocationsBuilderX86::VisitSub(HSub* sub) {
3443 LocationSummary* locations =
3444 new (GetGraph()->GetAllocator()) LocationSummary(sub, LocationSummary::kNoCall);
3445 switch (sub->GetResultType()) {
3446 case DataType::Type::kInt32:
3447 case DataType::Type::kInt64: {
3448 locations->SetInAt(0, Location::RequiresRegister());
3449 locations->SetInAt(1, Location::Any());
3450 locations->SetOut(Location::SameAsFirstInput());
3451 break;
3452 }
3453 case DataType::Type::kFloat32:
3454 case DataType::Type::kFloat64: {
3455 locations->SetInAt(0, Location::RequiresFpuRegister());
3456 if (sub->InputAt(1)->IsX86LoadFromConstantTable()) {
3457 DCHECK(sub->InputAt(1)->IsEmittedAtUseSite());
3458 } else if (sub->InputAt(1)->IsConstant()) {
3459 locations->SetInAt(1, Location::RequiresFpuRegister());
3460 } else {
3461 locations->SetInAt(1, Location::Any());
3462 }
3463 locations->SetOut(Location::SameAsFirstInput());
3464 break;
3465 }
3466
3467 default:
3468 LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
3469 }
3470 }
3471
VisitSub(HSub * sub)3472 void InstructionCodeGeneratorX86::VisitSub(HSub* sub) {
3473 LocationSummary* locations = sub->GetLocations();
3474 Location first = locations->InAt(0);
3475 Location second = locations->InAt(1);
3476 DCHECK(first.Equals(locations->Out()));
3477 switch (sub->GetResultType()) {
3478 case DataType::Type::kInt32: {
3479 if (second.IsRegister()) {
3480 __ subl(first.AsRegister<Register>(), second.AsRegister<Register>());
3481 } else if (second.IsConstant()) {
3482 __ subl(first.AsRegister<Register>(),
3483 Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
3484 } else {
3485 __ subl(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
3486 }
3487 break;
3488 }
3489
3490 case DataType::Type::kInt64: {
3491 if (second.IsRegisterPair()) {
3492 __ subl(first.AsRegisterPairLow<Register>(), second.AsRegisterPairLow<Register>());
3493 __ sbbl(first.AsRegisterPairHigh<Register>(), second.AsRegisterPairHigh<Register>());
3494 } else if (second.IsDoubleStackSlot()) {
3495 __ subl(first.AsRegisterPairLow<Register>(), Address(ESP, second.GetStackIndex()));
3496 __ sbbl(first.AsRegisterPairHigh<Register>(),
3497 Address(ESP, second.GetHighStackIndex(kX86WordSize)));
3498 } else {
3499 DCHECK(second.IsConstant()) << second;
3500 int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
3501 __ subl(first.AsRegisterPairLow<Register>(), Immediate(Low32Bits(value)));
3502 __ sbbl(first.AsRegisterPairHigh<Register>(), Immediate(High32Bits(value)));
3503 }
3504 break;
3505 }
3506
3507 case DataType::Type::kFloat32: {
3508 if (second.IsFpuRegister()) {
3509 __ subss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3510 } else if (sub->InputAt(1)->IsX86LoadFromConstantTable()) {
3511 HX86LoadFromConstantTable* const_area = sub->InputAt(1)->AsX86LoadFromConstantTable();
3512 DCHECK(const_area->IsEmittedAtUseSite());
3513 __ subss(first.AsFpuRegister<XmmRegister>(),
3514 codegen_->LiteralFloatAddress(
3515 const_area->GetConstant()->AsFloatConstant()->GetValue(),
3516 const_area->GetBaseMethodAddress(),
3517 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3518 } else {
3519 DCHECK(second.IsStackSlot());
3520 __ subss(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3521 }
3522 break;
3523 }
3524
3525 case DataType::Type::kFloat64: {
3526 if (second.IsFpuRegister()) {
3527 __ subsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3528 } else if (sub->InputAt(1)->IsX86LoadFromConstantTable()) {
3529 HX86LoadFromConstantTable* const_area = sub->InputAt(1)->AsX86LoadFromConstantTable();
3530 DCHECK(const_area->IsEmittedAtUseSite());
3531 __ subsd(first.AsFpuRegister<XmmRegister>(),
3532 codegen_->LiteralDoubleAddress(
3533 const_area->GetConstant()->AsDoubleConstant()->GetValue(),
3534 const_area->GetBaseMethodAddress(),
3535 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3536 } else {
3537 DCHECK(second.IsDoubleStackSlot());
3538 __ subsd(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3539 }
3540 break;
3541 }
3542
3543 default:
3544 LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
3545 }
3546 }
3547
VisitMul(HMul * mul)3548 void LocationsBuilderX86::VisitMul(HMul* mul) {
3549 LocationSummary* locations =
3550 new (GetGraph()->GetAllocator()) LocationSummary(mul, LocationSummary::kNoCall);
3551 switch (mul->GetResultType()) {
3552 case DataType::Type::kInt32:
3553 locations->SetInAt(0, Location::RequiresRegister());
3554 locations->SetInAt(1, Location::Any());
3555 if (mul->InputAt(1)->IsIntConstant()) {
3556 // Can use 3 operand multiply.
3557 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3558 } else {
3559 locations->SetOut(Location::SameAsFirstInput());
3560 }
3561 break;
3562 case DataType::Type::kInt64: {
3563 locations->SetInAt(0, Location::RequiresRegister());
3564 locations->SetInAt(1, Location::Any());
3565 locations->SetOut(Location::SameAsFirstInput());
3566 // Needed for imul on 32bits with 64bits output.
3567 locations->AddTemp(Location::RegisterLocation(EAX));
3568 locations->AddTemp(Location::RegisterLocation(EDX));
3569 break;
3570 }
3571 case DataType::Type::kFloat32:
3572 case DataType::Type::kFloat64: {
3573 locations->SetInAt(0, Location::RequiresFpuRegister());
3574 if (mul->InputAt(1)->IsX86LoadFromConstantTable()) {
3575 DCHECK(mul->InputAt(1)->IsEmittedAtUseSite());
3576 } else if (mul->InputAt(1)->IsConstant()) {
3577 locations->SetInAt(1, Location::RequiresFpuRegister());
3578 } else {
3579 locations->SetInAt(1, Location::Any());
3580 }
3581 locations->SetOut(Location::SameAsFirstInput());
3582 break;
3583 }
3584
3585 default:
3586 LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
3587 }
3588 }
3589
VisitMul(HMul * mul)3590 void InstructionCodeGeneratorX86::VisitMul(HMul* mul) {
3591 LocationSummary* locations = mul->GetLocations();
3592 Location first = locations->InAt(0);
3593 Location second = locations->InAt(1);
3594 Location out = locations->Out();
3595
3596 switch (mul->GetResultType()) {
3597 case DataType::Type::kInt32:
3598 // The constant may have ended up in a register, so test explicitly to avoid
3599 // problems where the output may not be the same as the first operand.
3600 if (mul->InputAt(1)->IsIntConstant()) {
3601 Immediate imm(mul->InputAt(1)->AsIntConstant()->GetValue());
3602 __ imull(out.AsRegister<Register>(), first.AsRegister<Register>(), imm);
3603 } else if (second.IsRegister()) {
3604 DCHECK(first.Equals(out));
3605 __ imull(first.AsRegister<Register>(), second.AsRegister<Register>());
3606 } else {
3607 DCHECK(second.IsStackSlot());
3608 DCHECK(first.Equals(out));
3609 __ imull(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
3610 }
3611 break;
3612
3613 case DataType::Type::kInt64: {
3614 Register in1_hi = first.AsRegisterPairHigh<Register>();
3615 Register in1_lo = first.AsRegisterPairLow<Register>();
3616 Register eax = locations->GetTemp(0).AsRegister<Register>();
3617 Register edx = locations->GetTemp(1).AsRegister<Register>();
3618
3619 DCHECK_EQ(EAX, eax);
3620 DCHECK_EQ(EDX, edx);
3621
3622 // input: in1 - 64 bits, in2 - 64 bits.
3623 // output: in1
3624 // formula: in1.hi : in1.lo = (in1.lo * in2.hi + in1.hi * in2.lo)* 2^32 + in1.lo * in2.lo
3625 // parts: in1.hi = in1.lo * in2.hi + in1.hi * in2.lo + (in1.lo * in2.lo)[63:32]
3626 // parts: in1.lo = (in1.lo * in2.lo)[31:0]
3627 if (second.IsConstant()) {
3628 DCHECK(second.GetConstant()->IsLongConstant());
3629
3630 int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
3631 int32_t low_value = Low32Bits(value);
3632 int32_t high_value = High32Bits(value);
3633 Immediate low(low_value);
3634 Immediate high(high_value);
3635
3636 __ movl(eax, high);
3637 // eax <- in1.lo * in2.hi
3638 __ imull(eax, in1_lo);
3639 // in1.hi <- in1.hi * in2.lo
3640 __ imull(in1_hi, low);
3641 // in1.hi <- in1.lo * in2.hi + in1.hi * in2.lo
3642 __ addl(in1_hi, eax);
3643 // move in2_lo to eax to prepare for double precision
3644 __ movl(eax, low);
3645 // edx:eax <- in1.lo * in2.lo
3646 __ mull(in1_lo);
3647 // in1.hi <- in2.hi * in1.lo + in2.lo * in1.hi + (in1.lo * in2.lo)[63:32]
3648 __ addl(in1_hi, edx);
3649 // in1.lo <- (in1.lo * in2.lo)[31:0];
3650 __ movl(in1_lo, eax);
3651 } else if (second.IsRegisterPair()) {
3652 Register in2_hi = second.AsRegisterPairHigh<Register>();
3653 Register in2_lo = second.AsRegisterPairLow<Register>();
3654
3655 __ movl(eax, in2_hi);
3656 // eax <- in1.lo * in2.hi
3657 __ imull(eax, in1_lo);
3658 // in1.hi <- in1.hi * in2.lo
3659 __ imull(in1_hi, in2_lo);
3660 // in1.hi <- in1.lo * in2.hi + in1.hi * in2.lo
3661 __ addl(in1_hi, eax);
3662 // move in1_lo to eax to prepare for double precision
3663 __ movl(eax, in1_lo);
3664 // edx:eax <- in1.lo * in2.lo
3665 __ mull(in2_lo);
3666 // in1.hi <- in2.hi * in1.lo + in2.lo * in1.hi + (in1.lo * in2.lo)[63:32]
3667 __ addl(in1_hi, edx);
3668 // in1.lo <- (in1.lo * in2.lo)[31:0];
3669 __ movl(in1_lo, eax);
3670 } else {
3671 DCHECK(second.IsDoubleStackSlot()) << second;
3672 Address in2_hi(ESP, second.GetHighStackIndex(kX86WordSize));
3673 Address in2_lo(ESP, second.GetStackIndex());
3674
3675 __ movl(eax, in2_hi);
3676 // eax <- in1.lo * in2.hi
3677 __ imull(eax, in1_lo);
3678 // in1.hi <- in1.hi * in2.lo
3679 __ imull(in1_hi, in2_lo);
3680 // in1.hi <- in1.lo * in2.hi + in1.hi * in2.lo
3681 __ addl(in1_hi, eax);
3682 // move in1_lo to eax to prepare for double precision
3683 __ movl(eax, in1_lo);
3684 // edx:eax <- in1.lo * in2.lo
3685 __ mull(in2_lo);
3686 // in1.hi <- in2.hi * in1.lo + in2.lo * in1.hi + (in1.lo * in2.lo)[63:32]
3687 __ addl(in1_hi, edx);
3688 // in1.lo <- (in1.lo * in2.lo)[31:0];
3689 __ movl(in1_lo, eax);
3690 }
3691
3692 break;
3693 }
3694
3695 case DataType::Type::kFloat32: {
3696 DCHECK(first.Equals(locations->Out()));
3697 if (second.IsFpuRegister()) {
3698 __ mulss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3699 } else if (mul->InputAt(1)->IsX86LoadFromConstantTable()) {
3700 HX86LoadFromConstantTable* const_area = mul->InputAt(1)->AsX86LoadFromConstantTable();
3701 DCHECK(const_area->IsEmittedAtUseSite());
3702 __ mulss(first.AsFpuRegister<XmmRegister>(),
3703 codegen_->LiteralFloatAddress(
3704 const_area->GetConstant()->AsFloatConstant()->GetValue(),
3705 const_area->GetBaseMethodAddress(),
3706 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3707 } else {
3708 DCHECK(second.IsStackSlot());
3709 __ mulss(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3710 }
3711 break;
3712 }
3713
3714 case DataType::Type::kFloat64: {
3715 DCHECK(first.Equals(locations->Out()));
3716 if (second.IsFpuRegister()) {
3717 __ mulsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3718 } else if (mul->InputAt(1)->IsX86LoadFromConstantTable()) {
3719 HX86LoadFromConstantTable* const_area = mul->InputAt(1)->AsX86LoadFromConstantTable();
3720 DCHECK(const_area->IsEmittedAtUseSite());
3721 __ mulsd(first.AsFpuRegister<XmmRegister>(),
3722 codegen_->LiteralDoubleAddress(
3723 const_area->GetConstant()->AsDoubleConstant()->GetValue(),
3724 const_area->GetBaseMethodAddress(),
3725 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3726 } else {
3727 DCHECK(second.IsDoubleStackSlot());
3728 __ mulsd(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3729 }
3730 break;
3731 }
3732
3733 default:
3734 LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
3735 }
3736 }
3737
PushOntoFPStack(Location source,uint32_t temp_offset,uint32_t stack_adjustment,bool is_fp,bool is_wide)3738 void InstructionCodeGeneratorX86::PushOntoFPStack(Location source,
3739 uint32_t temp_offset,
3740 uint32_t stack_adjustment,
3741 bool is_fp,
3742 bool is_wide) {
3743 if (source.IsStackSlot()) {
3744 DCHECK(!is_wide);
3745 if (is_fp) {
3746 __ flds(Address(ESP, source.GetStackIndex() + stack_adjustment));
3747 } else {
3748 __ filds(Address(ESP, source.GetStackIndex() + stack_adjustment));
3749 }
3750 } else if (source.IsDoubleStackSlot()) {
3751 DCHECK(is_wide);
3752 if (is_fp) {
3753 __ fldl(Address(ESP, source.GetStackIndex() + stack_adjustment));
3754 } else {
3755 __ fildl(Address(ESP, source.GetStackIndex() + stack_adjustment));
3756 }
3757 } else {
3758 // Write the value to the temporary location on the stack and load to FP stack.
3759 if (!is_wide) {
3760 Location stack_temp = Location::StackSlot(temp_offset);
3761 codegen_->Move32(stack_temp, source);
3762 if (is_fp) {
3763 __ flds(Address(ESP, temp_offset));
3764 } else {
3765 __ filds(Address(ESP, temp_offset));
3766 }
3767 } else {
3768 Location stack_temp = Location::DoubleStackSlot(temp_offset);
3769 codegen_->Move64(stack_temp, source);
3770 if (is_fp) {
3771 __ fldl(Address(ESP, temp_offset));
3772 } else {
3773 __ fildl(Address(ESP, temp_offset));
3774 }
3775 }
3776 }
3777 }
3778
GenerateRemFP(HRem * rem)3779 void InstructionCodeGeneratorX86::GenerateRemFP(HRem *rem) {
3780 DataType::Type type = rem->GetResultType();
3781 bool is_float = type == DataType::Type::kFloat32;
3782 size_t elem_size = DataType::Size(type);
3783 LocationSummary* locations = rem->GetLocations();
3784 Location first = locations->InAt(0);
3785 Location second = locations->InAt(1);
3786 Location out = locations->Out();
3787
3788 // Create stack space for 2 elements.
3789 // TODO: enhance register allocator to ask for stack temporaries.
3790 codegen_->IncreaseFrame(2 * elem_size);
3791
3792 // Load the values to the FP stack in reverse order, using temporaries if needed.
3793 const bool is_wide = !is_float;
3794 PushOntoFPStack(second, elem_size, 2 * elem_size, /* is_fp= */ true, is_wide);
3795 PushOntoFPStack(first, 0, 2 * elem_size, /* is_fp= */ true, is_wide);
3796
3797 // Loop doing FPREM until we stabilize.
3798 NearLabel retry;
3799 __ Bind(&retry);
3800 __ fprem();
3801
3802 // Move FP status to AX.
3803 __ fstsw();
3804
3805 // And see if the argument reduction is complete. This is signaled by the
3806 // C2 FPU flag bit set to 0.
3807 __ andl(EAX, Immediate(kC2ConditionMask));
3808 __ j(kNotEqual, &retry);
3809
3810 // We have settled on the final value. Retrieve it into an XMM register.
3811 // Store FP top of stack to real stack.
3812 if (is_float) {
3813 __ fsts(Address(ESP, 0));
3814 } else {
3815 __ fstl(Address(ESP, 0));
3816 }
3817
3818 // Pop the 2 items from the FP stack.
3819 __ fucompp();
3820
3821 // Load the value from the stack into an XMM register.
3822 DCHECK(out.IsFpuRegister()) << out;
3823 if (is_float) {
3824 __ movss(out.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
3825 } else {
3826 __ movsd(out.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
3827 }
3828
3829 // And remove the temporary stack space we allocated.
3830 codegen_->DecreaseFrame(2 * elem_size);
3831 }
3832
3833
DivRemOneOrMinusOne(HBinaryOperation * instruction)3834 void InstructionCodeGeneratorX86::DivRemOneOrMinusOne(HBinaryOperation* instruction) {
3835 DCHECK(instruction->IsDiv() || instruction->IsRem());
3836
3837 LocationSummary* locations = instruction->GetLocations();
3838 DCHECK(locations->InAt(1).IsConstant());
3839 DCHECK(locations->InAt(1).GetConstant()->IsIntConstant());
3840
3841 Register out_register = locations->Out().AsRegister<Register>();
3842 Register input_register = locations->InAt(0).AsRegister<Register>();
3843 int32_t imm = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
3844
3845 DCHECK(imm == 1 || imm == -1);
3846
3847 if (instruction->IsRem()) {
3848 __ xorl(out_register, out_register);
3849 } else {
3850 __ movl(out_register, input_register);
3851 if (imm == -1) {
3852 __ negl(out_register);
3853 }
3854 }
3855 }
3856
RemByPowerOfTwo(HRem * instruction)3857 void InstructionCodeGeneratorX86::RemByPowerOfTwo(HRem* instruction) {
3858 LocationSummary* locations = instruction->GetLocations();
3859 Location second = locations->InAt(1);
3860
3861 Register out = locations->Out().AsRegister<Register>();
3862 Register numerator = locations->InAt(0).AsRegister<Register>();
3863
3864 int32_t imm = Int64FromConstant(second.GetConstant());
3865 DCHECK(IsPowerOfTwo(AbsOrMin(imm)));
3866 uint32_t abs_imm = static_cast<uint32_t>(AbsOrMin(imm));
3867
3868 Register tmp = locations->GetTemp(0).AsRegister<Register>();
3869 NearLabel done;
3870 __ movl(out, numerator);
3871 __ andl(out, Immediate(abs_imm-1));
3872 __ j(Condition::kZero, &done);
3873 __ leal(tmp, Address(out, static_cast<int32_t>(~(abs_imm-1))));
3874 __ testl(numerator, numerator);
3875 __ cmovl(Condition::kLess, out, tmp);
3876 __ Bind(&done);
3877 }
3878
DivByPowerOfTwo(HDiv * instruction)3879 void InstructionCodeGeneratorX86::DivByPowerOfTwo(HDiv* instruction) {
3880 LocationSummary* locations = instruction->GetLocations();
3881
3882 Register out_register = locations->Out().AsRegister<Register>();
3883 Register input_register = locations->InAt(0).AsRegister<Register>();
3884 int32_t imm = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
3885 DCHECK(IsPowerOfTwo(AbsOrMin(imm)));
3886 uint32_t abs_imm = static_cast<uint32_t>(AbsOrMin(imm));
3887
3888 Register num = locations->GetTemp(0).AsRegister<Register>();
3889
3890 __ leal(num, Address(input_register, abs_imm - 1));
3891 __ testl(input_register, input_register);
3892 __ cmovl(kGreaterEqual, num, input_register);
3893 int shift = CTZ(imm);
3894 __ sarl(num, Immediate(shift));
3895
3896 if (imm < 0) {
3897 __ negl(num);
3898 }
3899
3900 __ movl(out_register, num);
3901 }
3902
GenerateDivRemWithAnyConstant(HBinaryOperation * instruction)3903 void InstructionCodeGeneratorX86::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) {
3904 DCHECK(instruction->IsDiv() || instruction->IsRem());
3905
3906 LocationSummary* locations = instruction->GetLocations();
3907 int imm = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
3908
3909 Register eax = locations->InAt(0).AsRegister<Register>();
3910 Register out = locations->Out().AsRegister<Register>();
3911 Register num;
3912 Register edx;
3913
3914 if (instruction->IsDiv()) {
3915 edx = locations->GetTemp(0).AsRegister<Register>();
3916 num = locations->GetTemp(1).AsRegister<Register>();
3917 } else {
3918 edx = locations->Out().AsRegister<Register>();
3919 num = locations->GetTemp(0).AsRegister<Register>();
3920 }
3921
3922 DCHECK_EQ(EAX, eax);
3923 DCHECK_EQ(EDX, edx);
3924 if (instruction->IsDiv()) {
3925 DCHECK_EQ(EAX, out);
3926 } else {
3927 DCHECK_EQ(EDX, out);
3928 }
3929
3930 int64_t magic;
3931 int shift;
3932 CalculateMagicAndShiftForDivRem(imm, /* is_long= */ false, &magic, &shift);
3933
3934 // Save the numerator.
3935 __ movl(num, eax);
3936
3937 // EAX = magic
3938 __ movl(eax, Immediate(magic));
3939
3940 // EDX:EAX = magic * numerator
3941 __ imull(num);
3942
3943 if (imm > 0 && magic < 0) {
3944 // EDX += num
3945 __ addl(edx, num);
3946 } else if (imm < 0 && magic > 0) {
3947 __ subl(edx, num);
3948 }
3949
3950 // Shift if needed.
3951 if (shift != 0) {
3952 __ sarl(edx, Immediate(shift));
3953 }
3954
3955 // EDX += 1 if EDX < 0
3956 __ movl(eax, edx);
3957 __ shrl(edx, Immediate(31));
3958 __ addl(edx, eax);
3959
3960 if (instruction->IsRem()) {
3961 __ movl(eax, num);
3962 __ imull(edx, Immediate(imm));
3963 __ subl(eax, edx);
3964 __ movl(edx, eax);
3965 } else {
3966 __ movl(eax, edx);
3967 }
3968 }
3969
GenerateDivRemIntegral(HBinaryOperation * instruction)3970 void InstructionCodeGeneratorX86::GenerateDivRemIntegral(HBinaryOperation* instruction) {
3971 DCHECK(instruction->IsDiv() || instruction->IsRem());
3972
3973 LocationSummary* locations = instruction->GetLocations();
3974 Location out = locations->Out();
3975 Location first = locations->InAt(0);
3976 Location second = locations->InAt(1);
3977 bool is_div = instruction->IsDiv();
3978
3979 switch (instruction->GetResultType()) {
3980 case DataType::Type::kInt32: {
3981 DCHECK_EQ(EAX, first.AsRegister<Register>());
3982 DCHECK_EQ(is_div ? EAX : EDX, out.AsRegister<Register>());
3983
3984 if (second.IsConstant()) {
3985 int32_t imm = second.GetConstant()->AsIntConstant()->GetValue();
3986
3987 if (imm == 0) {
3988 // Do not generate anything for 0. DivZeroCheck would forbid any generated code.
3989 } else if (imm == 1 || imm == -1) {
3990 DivRemOneOrMinusOne(instruction);
3991 } else if (IsPowerOfTwo(AbsOrMin(imm))) {
3992 if (is_div) {
3993 DivByPowerOfTwo(instruction->AsDiv());
3994 } else {
3995 RemByPowerOfTwo(instruction->AsRem());
3996 }
3997 } else {
3998 DCHECK(imm <= -2 || imm >= 2);
3999 GenerateDivRemWithAnyConstant(instruction);
4000 }
4001 } else {
4002 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) DivRemMinusOneSlowPathX86(
4003 instruction, out.AsRegister<Register>(), is_div);
4004 codegen_->AddSlowPath(slow_path);
4005
4006 Register second_reg = second.AsRegister<Register>();
4007 // 0x80000000/-1 triggers an arithmetic exception!
4008 // Dividing by -1 is actually negation and -0x800000000 = 0x80000000 so
4009 // it's safe to just use negl instead of more complex comparisons.
4010
4011 __ cmpl(second_reg, Immediate(-1));
4012 __ j(kEqual, slow_path->GetEntryLabel());
4013
4014 // edx:eax <- sign-extended of eax
4015 __ cdq();
4016 // eax = quotient, edx = remainder
4017 __ idivl(second_reg);
4018 __ Bind(slow_path->GetExitLabel());
4019 }
4020 break;
4021 }
4022
4023 case DataType::Type::kInt64: {
4024 InvokeRuntimeCallingConvention calling_convention;
4025 DCHECK_EQ(calling_convention.GetRegisterAt(0), first.AsRegisterPairLow<Register>());
4026 DCHECK_EQ(calling_convention.GetRegisterAt(1), first.AsRegisterPairHigh<Register>());
4027 DCHECK_EQ(calling_convention.GetRegisterAt(2), second.AsRegisterPairLow<Register>());
4028 DCHECK_EQ(calling_convention.GetRegisterAt(3), second.AsRegisterPairHigh<Register>());
4029 DCHECK_EQ(EAX, out.AsRegisterPairLow<Register>());
4030 DCHECK_EQ(EDX, out.AsRegisterPairHigh<Register>());
4031
4032 if (is_div) {
4033 codegen_->InvokeRuntime(kQuickLdiv, instruction, instruction->GetDexPc());
4034 CheckEntrypointTypes<kQuickLdiv, int64_t, int64_t, int64_t>();
4035 } else {
4036 codegen_->InvokeRuntime(kQuickLmod, instruction, instruction->GetDexPc());
4037 CheckEntrypointTypes<kQuickLmod, int64_t, int64_t, int64_t>();
4038 }
4039 break;
4040 }
4041
4042 default:
4043 LOG(FATAL) << "Unexpected type for GenerateDivRemIntegral " << instruction->GetResultType();
4044 }
4045 }
4046
VisitDiv(HDiv * div)4047 void LocationsBuilderX86::VisitDiv(HDiv* div) {
4048 LocationSummary::CallKind call_kind = (div->GetResultType() == DataType::Type::kInt64)
4049 ? LocationSummary::kCallOnMainOnly
4050 : LocationSummary::kNoCall;
4051 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(div, call_kind);
4052
4053 switch (div->GetResultType()) {
4054 case DataType::Type::kInt32: {
4055 locations->SetInAt(0, Location::RegisterLocation(EAX));
4056 locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1)));
4057 locations->SetOut(Location::SameAsFirstInput());
4058 // Intel uses edx:eax as the dividend.
4059 locations->AddTemp(Location::RegisterLocation(EDX));
4060 // We need to save the numerator while we tweak eax and edx. As we are using imul in a way
4061 // which enforces results to be in EAX and EDX, things are simpler if we use EAX also as
4062 // output and request another temp.
4063 if (div->InputAt(1)->IsIntConstant()) {
4064 locations->AddTemp(Location::RequiresRegister());
4065 }
4066 break;
4067 }
4068 case DataType::Type::kInt64: {
4069 InvokeRuntimeCallingConvention calling_convention;
4070 locations->SetInAt(0, Location::RegisterPairLocation(
4071 calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1)));
4072 locations->SetInAt(1, Location::RegisterPairLocation(
4073 calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3)));
4074 // Runtime helper puts the result in EAX, EDX.
4075 locations->SetOut(Location::RegisterPairLocation(EAX, EDX));
4076 break;
4077 }
4078 case DataType::Type::kFloat32:
4079 case DataType::Type::kFloat64: {
4080 locations->SetInAt(0, Location::RequiresFpuRegister());
4081 if (div->InputAt(1)->IsX86LoadFromConstantTable()) {
4082 DCHECK(div->InputAt(1)->IsEmittedAtUseSite());
4083 } else if (div->InputAt(1)->IsConstant()) {
4084 locations->SetInAt(1, Location::RequiresFpuRegister());
4085 } else {
4086 locations->SetInAt(1, Location::Any());
4087 }
4088 locations->SetOut(Location::SameAsFirstInput());
4089 break;
4090 }
4091
4092 default:
4093 LOG(FATAL) << "Unexpected div type " << div->GetResultType();
4094 }
4095 }
4096
VisitDiv(HDiv * div)4097 void InstructionCodeGeneratorX86::VisitDiv(HDiv* div) {
4098 LocationSummary* locations = div->GetLocations();
4099 Location first = locations->InAt(0);
4100 Location second = locations->InAt(1);
4101
4102 switch (div->GetResultType()) {
4103 case DataType::Type::kInt32:
4104 case DataType::Type::kInt64: {
4105 GenerateDivRemIntegral(div);
4106 break;
4107 }
4108
4109 case DataType::Type::kFloat32: {
4110 if (second.IsFpuRegister()) {
4111 __ divss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
4112 } else if (div->InputAt(1)->IsX86LoadFromConstantTable()) {
4113 HX86LoadFromConstantTable* const_area = div->InputAt(1)->AsX86LoadFromConstantTable();
4114 DCHECK(const_area->IsEmittedAtUseSite());
4115 __ divss(first.AsFpuRegister<XmmRegister>(),
4116 codegen_->LiteralFloatAddress(
4117 const_area->GetConstant()->AsFloatConstant()->GetValue(),
4118 const_area->GetBaseMethodAddress(),
4119 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
4120 } else {
4121 DCHECK(second.IsStackSlot());
4122 __ divss(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
4123 }
4124 break;
4125 }
4126
4127 case DataType::Type::kFloat64: {
4128 if (second.IsFpuRegister()) {
4129 __ divsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
4130 } else if (div->InputAt(1)->IsX86LoadFromConstantTable()) {
4131 HX86LoadFromConstantTable* const_area = div->InputAt(1)->AsX86LoadFromConstantTable();
4132 DCHECK(const_area->IsEmittedAtUseSite());
4133 __ divsd(first.AsFpuRegister<XmmRegister>(),
4134 codegen_->LiteralDoubleAddress(
4135 const_area->GetConstant()->AsDoubleConstant()->GetValue(),
4136 const_area->GetBaseMethodAddress(),
4137 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
4138 } else {
4139 DCHECK(second.IsDoubleStackSlot());
4140 __ divsd(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
4141 }
4142 break;
4143 }
4144
4145 default:
4146 LOG(FATAL) << "Unexpected div type " << div->GetResultType();
4147 }
4148 }
4149
VisitRem(HRem * rem)4150 void LocationsBuilderX86::VisitRem(HRem* rem) {
4151 DataType::Type type = rem->GetResultType();
4152
4153 LocationSummary::CallKind call_kind = (rem->GetResultType() == DataType::Type::kInt64)
4154 ? LocationSummary::kCallOnMainOnly
4155 : LocationSummary::kNoCall;
4156 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(rem, call_kind);
4157
4158 switch (type) {
4159 case DataType::Type::kInt32: {
4160 locations->SetInAt(0, Location::RegisterLocation(EAX));
4161 locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1)));
4162 locations->SetOut(Location::RegisterLocation(EDX));
4163 // We need to save the numerator while we tweak eax and edx. As we are using imul in a way
4164 // which enforces results to be in EAX and EDX, things are simpler if we use EDX also as
4165 // output and request another temp.
4166 if (rem->InputAt(1)->IsIntConstant()) {
4167 locations->AddTemp(Location::RequiresRegister());
4168 }
4169 break;
4170 }
4171 case DataType::Type::kInt64: {
4172 InvokeRuntimeCallingConvention calling_convention;
4173 locations->SetInAt(0, Location::RegisterPairLocation(
4174 calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1)));
4175 locations->SetInAt(1, Location::RegisterPairLocation(
4176 calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3)));
4177 // Runtime helper puts the result in EAX, EDX.
4178 locations->SetOut(Location::RegisterPairLocation(EAX, EDX));
4179 break;
4180 }
4181 case DataType::Type::kFloat64:
4182 case DataType::Type::kFloat32: {
4183 locations->SetInAt(0, Location::Any());
4184 locations->SetInAt(1, Location::Any());
4185 locations->SetOut(Location::RequiresFpuRegister());
4186 locations->AddTemp(Location::RegisterLocation(EAX));
4187 break;
4188 }
4189
4190 default:
4191 LOG(FATAL) << "Unexpected rem type " << type;
4192 }
4193 }
4194
VisitRem(HRem * rem)4195 void InstructionCodeGeneratorX86::VisitRem(HRem* rem) {
4196 DataType::Type type = rem->GetResultType();
4197 switch (type) {
4198 case DataType::Type::kInt32:
4199 case DataType::Type::kInt64: {
4200 GenerateDivRemIntegral(rem);
4201 break;
4202 }
4203 case DataType::Type::kFloat32:
4204 case DataType::Type::kFloat64: {
4205 GenerateRemFP(rem);
4206 break;
4207 }
4208 default:
4209 LOG(FATAL) << "Unexpected rem type " << type;
4210 }
4211 }
4212
CreateMinMaxLocations(ArenaAllocator * allocator,HBinaryOperation * minmax)4213 static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) {
4214 LocationSummary* locations = new (allocator) LocationSummary(minmax);
4215 switch (minmax->GetResultType()) {
4216 case DataType::Type::kInt32:
4217 locations->SetInAt(0, Location::RequiresRegister());
4218 locations->SetInAt(1, Location::RequiresRegister());
4219 locations->SetOut(Location::SameAsFirstInput());
4220 break;
4221 case DataType::Type::kInt64:
4222 locations->SetInAt(0, Location::RequiresRegister());
4223 locations->SetInAt(1, Location::RequiresRegister());
4224 locations->SetOut(Location::SameAsFirstInput());
4225 // Register to use to perform a long subtract to set cc.
4226 locations->AddTemp(Location::RequiresRegister());
4227 break;
4228 case DataType::Type::kFloat32:
4229 locations->SetInAt(0, Location::RequiresFpuRegister());
4230 locations->SetInAt(1, Location::RequiresFpuRegister());
4231 locations->SetOut(Location::SameAsFirstInput());
4232 locations->AddTemp(Location::RequiresRegister());
4233 break;
4234 case DataType::Type::kFloat64:
4235 locations->SetInAt(0, Location::RequiresFpuRegister());
4236 locations->SetInAt(1, Location::RequiresFpuRegister());
4237 locations->SetOut(Location::SameAsFirstInput());
4238 break;
4239 default:
4240 LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType();
4241 }
4242 }
4243
GenerateMinMaxInt(LocationSummary * locations,bool is_min,DataType::Type type)4244 void InstructionCodeGeneratorX86::GenerateMinMaxInt(LocationSummary* locations,
4245 bool is_min,
4246 DataType::Type type) {
4247 Location op1_loc = locations->InAt(0);
4248 Location op2_loc = locations->InAt(1);
4249
4250 // Shortcut for same input locations.
4251 if (op1_loc.Equals(op2_loc)) {
4252 // Can return immediately, as op1_loc == out_loc.
4253 // Note: if we ever support separate registers, e.g., output into memory, we need to check for
4254 // a copy here.
4255 DCHECK(locations->Out().Equals(op1_loc));
4256 return;
4257 }
4258
4259 if (type == DataType::Type::kInt64) {
4260 // Need to perform a subtract to get the sign right.
4261 // op1 is already in the same location as the output.
4262 Location output = locations->Out();
4263 Register output_lo = output.AsRegisterPairLow<Register>();
4264 Register output_hi = output.AsRegisterPairHigh<Register>();
4265
4266 Register op2_lo = op2_loc.AsRegisterPairLow<Register>();
4267 Register op2_hi = op2_loc.AsRegisterPairHigh<Register>();
4268
4269 // The comparison is performed by subtracting the second operand from
4270 // the first operand and then setting the status flags in the same
4271 // manner as the SUB instruction."
4272 __ cmpl(output_lo, op2_lo);
4273
4274 // Now use a temp and the borrow to finish the subtraction of op2_hi.
4275 Register temp = locations->GetTemp(0).AsRegister<Register>();
4276 __ movl(temp, output_hi);
4277 __ sbbl(temp, op2_hi);
4278
4279 // Now the condition code is correct.
4280 Condition cond = is_min ? Condition::kGreaterEqual : Condition::kLess;
4281 __ cmovl(cond, output_lo, op2_lo);
4282 __ cmovl(cond, output_hi, op2_hi);
4283 } else {
4284 DCHECK_EQ(type, DataType::Type::kInt32);
4285 Register out = locations->Out().AsRegister<Register>();
4286 Register op2 = op2_loc.AsRegister<Register>();
4287
4288 // (out := op1)
4289 // out <=? op2
4290 // if out is min jmp done
4291 // out := op2
4292 // done:
4293
4294 __ cmpl(out, op2);
4295 Condition cond = is_min ? Condition::kGreater : Condition::kLess;
4296 __ cmovl(cond, out, op2);
4297 }
4298 }
4299
GenerateMinMaxFP(LocationSummary * locations,bool is_min,DataType::Type type)4300 void InstructionCodeGeneratorX86::GenerateMinMaxFP(LocationSummary* locations,
4301 bool is_min,
4302 DataType::Type type) {
4303 Location op1_loc = locations->InAt(0);
4304 Location op2_loc = locations->InAt(1);
4305 Location out_loc = locations->Out();
4306 XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
4307
4308 // Shortcut for same input locations.
4309 if (op1_loc.Equals(op2_loc)) {
4310 DCHECK(out_loc.Equals(op1_loc));
4311 return;
4312 }
4313
4314 // (out := op1)
4315 // out <=? op2
4316 // if Nan jmp Nan_label
4317 // if out is min jmp done
4318 // if op2 is min jmp op2_label
4319 // handle -0/+0
4320 // jmp done
4321 // Nan_label:
4322 // out := NaN
4323 // op2_label:
4324 // out := op2
4325 // done:
4326 //
4327 // This removes one jmp, but needs to copy one input (op1) to out.
4328 //
4329 // TODO: This is straight from Quick (except literal pool). Make NaN an out-of-line slowpath?
4330
4331 XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>();
4332
4333 NearLabel nan, done, op2_label;
4334 if (type == DataType::Type::kFloat64) {
4335 __ ucomisd(out, op2);
4336 } else {
4337 DCHECK_EQ(type, DataType::Type::kFloat32);
4338 __ ucomiss(out, op2);
4339 }
4340
4341 __ j(Condition::kParityEven, &nan);
4342
4343 __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label);
4344 __ j(is_min ? Condition::kBelow : Condition::kAbove, &done);
4345
4346 // Handle 0.0/-0.0.
4347 if (is_min) {
4348 if (type == DataType::Type::kFloat64) {
4349 __ orpd(out, op2);
4350 } else {
4351 __ orps(out, op2);
4352 }
4353 } else {
4354 if (type == DataType::Type::kFloat64) {
4355 __ andpd(out, op2);
4356 } else {
4357 __ andps(out, op2);
4358 }
4359 }
4360 __ jmp(&done);
4361
4362 // NaN handling.
4363 __ Bind(&nan);
4364 if (type == DataType::Type::kFloat64) {
4365 // TODO: Use a constant from the constant table (requires extra input).
4366 __ LoadLongConstant(out, kDoubleNaN);
4367 } else {
4368 Register constant = locations->GetTemp(0).AsRegister<Register>();
4369 __ movl(constant, Immediate(kFloatNaN));
4370 __ movd(out, constant);
4371 }
4372 __ jmp(&done);
4373
4374 // out := op2;
4375 __ Bind(&op2_label);
4376 if (type == DataType::Type::kFloat64) {
4377 __ movsd(out, op2);
4378 } else {
4379 __ movss(out, op2);
4380 }
4381
4382 // Done.
4383 __ Bind(&done);
4384 }
4385
GenerateMinMax(HBinaryOperation * minmax,bool is_min)4386 void InstructionCodeGeneratorX86::GenerateMinMax(HBinaryOperation* minmax, bool is_min) {
4387 DataType::Type type = minmax->GetResultType();
4388 switch (type) {
4389 case DataType::Type::kInt32:
4390 case DataType::Type::kInt64:
4391 GenerateMinMaxInt(minmax->GetLocations(), is_min, type);
4392 break;
4393 case DataType::Type::kFloat32:
4394 case DataType::Type::kFloat64:
4395 GenerateMinMaxFP(minmax->GetLocations(), is_min, type);
4396 break;
4397 default:
4398 LOG(FATAL) << "Unexpected type for HMinMax " << type;
4399 }
4400 }
4401
VisitMin(HMin * min)4402 void LocationsBuilderX86::VisitMin(HMin* min) {
4403 CreateMinMaxLocations(GetGraph()->GetAllocator(), min);
4404 }
4405
VisitMin(HMin * min)4406 void InstructionCodeGeneratorX86::VisitMin(HMin* min) {
4407 GenerateMinMax(min, /*is_min*/ true);
4408 }
4409
VisitMax(HMax * max)4410 void LocationsBuilderX86::VisitMax(HMax* max) {
4411 CreateMinMaxLocations(GetGraph()->GetAllocator(), max);
4412 }
4413
VisitMax(HMax * max)4414 void InstructionCodeGeneratorX86::VisitMax(HMax* max) {
4415 GenerateMinMax(max, /*is_min*/ false);
4416 }
4417
VisitAbs(HAbs * abs)4418 void LocationsBuilderX86::VisitAbs(HAbs* abs) {
4419 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs);
4420 switch (abs->GetResultType()) {
4421 case DataType::Type::kInt32:
4422 locations->SetInAt(0, Location::RegisterLocation(EAX));
4423 locations->SetOut(Location::SameAsFirstInput());
4424 locations->AddTemp(Location::RegisterLocation(EDX));
4425 break;
4426 case DataType::Type::kInt64:
4427 locations->SetInAt(0, Location::RequiresRegister());
4428 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
4429 locations->AddTemp(Location::RequiresRegister());
4430 break;
4431 case DataType::Type::kFloat32:
4432 locations->SetInAt(0, Location::RequiresFpuRegister());
4433 locations->SetOut(Location::SameAsFirstInput());
4434 locations->AddTemp(Location::RequiresFpuRegister());
4435 locations->AddTemp(Location::RequiresRegister());
4436 break;
4437 case DataType::Type::kFloat64:
4438 locations->SetInAt(0, Location::RequiresFpuRegister());
4439 locations->SetOut(Location::SameAsFirstInput());
4440 locations->AddTemp(Location::RequiresFpuRegister());
4441 break;
4442 default:
4443 LOG(FATAL) << "Unexpected type for HAbs " << abs->GetResultType();
4444 }
4445 }
4446
VisitAbs(HAbs * abs)4447 void InstructionCodeGeneratorX86::VisitAbs(HAbs* abs) {
4448 LocationSummary* locations = abs->GetLocations();
4449 switch (abs->GetResultType()) {
4450 case DataType::Type::kInt32: {
4451 Register out = locations->Out().AsRegister<Register>();
4452 DCHECK_EQ(out, EAX);
4453 Register temp = locations->GetTemp(0).AsRegister<Register>();
4454 DCHECK_EQ(temp, EDX);
4455 // Sign extend EAX into EDX.
4456 __ cdq();
4457 // XOR EAX with sign.
4458 __ xorl(EAX, EDX);
4459 // Subtract out sign to correct.
4460 __ subl(EAX, EDX);
4461 // The result is in EAX.
4462 break;
4463 }
4464 case DataType::Type::kInt64: {
4465 Location input = locations->InAt(0);
4466 Register input_lo = input.AsRegisterPairLow<Register>();
4467 Register input_hi = input.AsRegisterPairHigh<Register>();
4468 Location output = locations->Out();
4469 Register output_lo = output.AsRegisterPairLow<Register>();
4470 Register output_hi = output.AsRegisterPairHigh<Register>();
4471 Register temp = locations->GetTemp(0).AsRegister<Register>();
4472 // Compute the sign into the temporary.
4473 __ movl(temp, input_hi);
4474 __ sarl(temp, Immediate(31));
4475 // Store the sign into the output.
4476 __ movl(output_lo, temp);
4477 __ movl(output_hi, temp);
4478 // XOR the input to the output.
4479 __ xorl(output_lo, input_lo);
4480 __ xorl(output_hi, input_hi);
4481 // Subtract the sign.
4482 __ subl(output_lo, temp);
4483 __ sbbl(output_hi, temp);
4484 break;
4485 }
4486 case DataType::Type::kFloat32: {
4487 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
4488 XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
4489 Register constant = locations->GetTemp(1).AsRegister<Register>();
4490 __ movl(constant, Immediate(INT32_C(0x7FFFFFFF)));
4491 __ movd(temp, constant);
4492 __ andps(out, temp);
4493 break;
4494 }
4495 case DataType::Type::kFloat64: {
4496 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
4497 XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
4498 // TODO: Use a constant from the constant table (requires extra input).
4499 __ LoadLongConstant(temp, INT64_C(0x7FFFFFFFFFFFFFFF));
4500 __ andpd(out, temp);
4501 break;
4502 }
4503 default:
4504 LOG(FATAL) << "Unexpected type for HAbs " << abs->GetResultType();
4505 }
4506 }
4507
VisitDivZeroCheck(HDivZeroCheck * instruction)4508 void LocationsBuilderX86::VisitDivZeroCheck(HDivZeroCheck* instruction) {
4509 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
4510 switch (instruction->GetType()) {
4511 case DataType::Type::kBool:
4512 case DataType::Type::kUint8:
4513 case DataType::Type::kInt8:
4514 case DataType::Type::kUint16:
4515 case DataType::Type::kInt16:
4516 case DataType::Type::kInt32: {
4517 locations->SetInAt(0, Location::Any());
4518 break;
4519 }
4520 case DataType::Type::kInt64: {
4521 locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
4522 if (!instruction->IsConstant()) {
4523 locations->AddTemp(Location::RequiresRegister());
4524 }
4525 break;
4526 }
4527 default:
4528 LOG(FATAL) << "Unexpected type for HDivZeroCheck " << instruction->GetType();
4529 }
4530 }
4531
VisitDivZeroCheck(HDivZeroCheck * instruction)4532 void InstructionCodeGeneratorX86::VisitDivZeroCheck(HDivZeroCheck* instruction) {
4533 SlowPathCode* slow_path =
4534 new (codegen_->GetScopedAllocator()) DivZeroCheckSlowPathX86(instruction);
4535 codegen_->AddSlowPath(slow_path);
4536
4537 LocationSummary* locations = instruction->GetLocations();
4538 Location value = locations->InAt(0);
4539
4540 switch (instruction->GetType()) {
4541 case DataType::Type::kBool:
4542 case DataType::Type::kUint8:
4543 case DataType::Type::kInt8:
4544 case DataType::Type::kUint16:
4545 case DataType::Type::kInt16:
4546 case DataType::Type::kInt32: {
4547 if (value.IsRegister()) {
4548 __ testl(value.AsRegister<Register>(), value.AsRegister<Register>());
4549 __ j(kEqual, slow_path->GetEntryLabel());
4550 } else if (value.IsStackSlot()) {
4551 __ cmpl(Address(ESP, value.GetStackIndex()), Immediate(0));
4552 __ j(kEqual, slow_path->GetEntryLabel());
4553 } else {
4554 DCHECK(value.IsConstant()) << value;
4555 if (value.GetConstant()->AsIntConstant()->GetValue() == 0) {
4556 __ jmp(slow_path->GetEntryLabel());
4557 }
4558 }
4559 break;
4560 }
4561 case DataType::Type::kInt64: {
4562 if (value.IsRegisterPair()) {
4563 Register temp = locations->GetTemp(0).AsRegister<Register>();
4564 __ movl(temp, value.AsRegisterPairLow<Register>());
4565 __ orl(temp, value.AsRegisterPairHigh<Register>());
4566 __ j(kEqual, slow_path->GetEntryLabel());
4567 } else {
4568 DCHECK(value.IsConstant()) << value;
4569 if (value.GetConstant()->AsLongConstant()->GetValue() == 0) {
4570 __ jmp(slow_path->GetEntryLabel());
4571 }
4572 }
4573 break;
4574 }
4575 default:
4576 LOG(FATAL) << "Unexpected type for HDivZeroCheck" << instruction->GetType();
4577 }
4578 }
4579
HandleShift(HBinaryOperation * op)4580 void LocationsBuilderX86::HandleShift(HBinaryOperation* op) {
4581 DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
4582
4583 LocationSummary* locations =
4584 new (GetGraph()->GetAllocator()) LocationSummary(op, LocationSummary::kNoCall);
4585
4586 switch (op->GetResultType()) {
4587 case DataType::Type::kInt32:
4588 case DataType::Type::kInt64: {
4589 // Can't have Location::Any() and output SameAsFirstInput()
4590 locations->SetInAt(0, Location::RequiresRegister());
4591 // The shift count needs to be in CL or a constant.
4592 locations->SetInAt(1, Location::ByteRegisterOrConstant(ECX, op->InputAt(1)));
4593 locations->SetOut(Location::SameAsFirstInput());
4594 break;
4595 }
4596 default:
4597 LOG(FATAL) << "Unexpected op type " << op->GetResultType();
4598 }
4599 }
4600
HandleShift(HBinaryOperation * op)4601 void InstructionCodeGeneratorX86::HandleShift(HBinaryOperation* op) {
4602 DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
4603
4604 LocationSummary* locations = op->GetLocations();
4605 Location first = locations->InAt(0);
4606 Location second = locations->InAt(1);
4607 DCHECK(first.Equals(locations->Out()));
4608
4609 switch (op->GetResultType()) {
4610 case DataType::Type::kInt32: {
4611 DCHECK(first.IsRegister());
4612 Register first_reg = first.AsRegister<Register>();
4613 if (second.IsRegister()) {
4614 Register second_reg = second.AsRegister<Register>();
4615 DCHECK_EQ(ECX, second_reg);
4616 if (op->IsShl()) {
4617 __ shll(first_reg, second_reg);
4618 } else if (op->IsShr()) {
4619 __ sarl(first_reg, second_reg);
4620 } else {
4621 __ shrl(first_reg, second_reg);
4622 }
4623 } else {
4624 int32_t shift = second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftDistance;
4625 if (shift == 0) {
4626 return;
4627 }
4628 Immediate imm(shift);
4629 if (op->IsShl()) {
4630 __ shll(first_reg, imm);
4631 } else if (op->IsShr()) {
4632 __ sarl(first_reg, imm);
4633 } else {
4634 __ shrl(first_reg, imm);
4635 }
4636 }
4637 break;
4638 }
4639 case DataType::Type::kInt64: {
4640 if (second.IsRegister()) {
4641 Register second_reg = second.AsRegister<Register>();
4642 DCHECK_EQ(ECX, second_reg);
4643 if (op->IsShl()) {
4644 GenerateShlLong(first, second_reg);
4645 } else if (op->IsShr()) {
4646 GenerateShrLong(first, second_reg);
4647 } else {
4648 GenerateUShrLong(first, second_reg);
4649 }
4650 } else {
4651 // Shift by a constant.
4652 int32_t shift = second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftDistance;
4653 // Nothing to do if the shift is 0, as the input is already the output.
4654 if (shift != 0) {
4655 if (op->IsShl()) {
4656 GenerateShlLong(first, shift);
4657 } else if (op->IsShr()) {
4658 GenerateShrLong(first, shift);
4659 } else {
4660 GenerateUShrLong(first, shift);
4661 }
4662 }
4663 }
4664 break;
4665 }
4666 default:
4667 LOG(FATAL) << "Unexpected op type " << op->GetResultType();
4668 }
4669 }
4670
GenerateShlLong(const Location & loc,int shift)4671 void InstructionCodeGeneratorX86::GenerateShlLong(const Location& loc, int shift) {
4672 Register low = loc.AsRegisterPairLow<Register>();
4673 Register high = loc.AsRegisterPairHigh<Register>();
4674 if (shift == 1) {
4675 // This is just an addition.
4676 __ addl(low, low);
4677 __ adcl(high, high);
4678 } else if (shift == 32) {
4679 // Shift by 32 is easy. High gets low, and low gets 0.
4680 codegen_->EmitParallelMoves(
4681 loc.ToLow(),
4682 loc.ToHigh(),
4683 DataType::Type::kInt32,
4684 Location::ConstantLocation(GetGraph()->GetIntConstant(0)),
4685 loc.ToLow(),
4686 DataType::Type::kInt32);
4687 } else if (shift > 32) {
4688 // Low part becomes 0. High part is low part << (shift-32).
4689 __ movl(high, low);
4690 __ shll(high, Immediate(shift - 32));
4691 __ xorl(low, low);
4692 } else {
4693 // Between 1 and 31.
4694 __ shld(high, low, Immediate(shift));
4695 __ shll(low, Immediate(shift));
4696 }
4697 }
4698
GenerateShlLong(const Location & loc,Register shifter)4699 void InstructionCodeGeneratorX86::GenerateShlLong(const Location& loc, Register shifter) {
4700 NearLabel done;
4701 __ shld(loc.AsRegisterPairHigh<Register>(), loc.AsRegisterPairLow<Register>(), shifter);
4702 __ shll(loc.AsRegisterPairLow<Register>(), shifter);
4703 __ testl(shifter, Immediate(32));
4704 __ j(kEqual, &done);
4705 __ movl(loc.AsRegisterPairHigh<Register>(), loc.AsRegisterPairLow<Register>());
4706 __ movl(loc.AsRegisterPairLow<Register>(), Immediate(0));
4707 __ Bind(&done);
4708 }
4709
GenerateShrLong(const Location & loc,int shift)4710 void InstructionCodeGeneratorX86::GenerateShrLong(const Location& loc, int shift) {
4711 Register low = loc.AsRegisterPairLow<Register>();
4712 Register high = loc.AsRegisterPairHigh<Register>();
4713 if (shift == 32) {
4714 // Need to copy the sign.
4715 DCHECK_NE(low, high);
4716 __ movl(low, high);
4717 __ sarl(high, Immediate(31));
4718 } else if (shift > 32) {
4719 DCHECK_NE(low, high);
4720 // High part becomes sign. Low part is shifted by shift - 32.
4721 __ movl(low, high);
4722 __ sarl(high, Immediate(31));
4723 __ sarl(low, Immediate(shift - 32));
4724 } else {
4725 // Between 1 and 31.
4726 __ shrd(low, high, Immediate(shift));
4727 __ sarl(high, Immediate(shift));
4728 }
4729 }
4730
GenerateShrLong(const Location & loc,Register shifter)4731 void InstructionCodeGeneratorX86::GenerateShrLong(const Location& loc, Register shifter) {
4732 NearLabel done;
4733 __ shrd(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>(), shifter);
4734 __ sarl(loc.AsRegisterPairHigh<Register>(), shifter);
4735 __ testl(shifter, Immediate(32));
4736 __ j(kEqual, &done);
4737 __ movl(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>());
4738 __ sarl(loc.AsRegisterPairHigh<Register>(), Immediate(31));
4739 __ Bind(&done);
4740 }
4741
GenerateUShrLong(const Location & loc,int shift)4742 void InstructionCodeGeneratorX86::GenerateUShrLong(const Location& loc, int shift) {
4743 Register low = loc.AsRegisterPairLow<Register>();
4744 Register high = loc.AsRegisterPairHigh<Register>();
4745 if (shift == 32) {
4746 // Shift by 32 is easy. Low gets high, and high gets 0.
4747 codegen_->EmitParallelMoves(
4748 loc.ToHigh(),
4749 loc.ToLow(),
4750 DataType::Type::kInt32,
4751 Location::ConstantLocation(GetGraph()->GetIntConstant(0)),
4752 loc.ToHigh(),
4753 DataType::Type::kInt32);
4754 } else if (shift > 32) {
4755 // Low part is high >> (shift - 32). High part becomes 0.
4756 __ movl(low, high);
4757 __ shrl(low, Immediate(shift - 32));
4758 __ xorl(high, high);
4759 } else {
4760 // Between 1 and 31.
4761 __ shrd(low, high, Immediate(shift));
4762 __ shrl(high, Immediate(shift));
4763 }
4764 }
4765
GenerateUShrLong(const Location & loc,Register shifter)4766 void InstructionCodeGeneratorX86::GenerateUShrLong(const Location& loc, Register shifter) {
4767 NearLabel done;
4768 __ shrd(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>(), shifter);
4769 __ shrl(loc.AsRegisterPairHigh<Register>(), shifter);
4770 __ testl(shifter, Immediate(32));
4771 __ j(kEqual, &done);
4772 __ movl(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>());
4773 __ movl(loc.AsRegisterPairHigh<Register>(), Immediate(0));
4774 __ Bind(&done);
4775 }
4776
VisitRor(HRor * ror)4777 void LocationsBuilderX86::VisitRor(HRor* ror) {
4778 LocationSummary* locations =
4779 new (GetGraph()->GetAllocator()) LocationSummary(ror, LocationSummary::kNoCall);
4780
4781 switch (ror->GetResultType()) {
4782 case DataType::Type::kInt64:
4783 // Add the temporary needed.
4784 locations->AddTemp(Location::RequiresRegister());
4785 FALLTHROUGH_INTENDED;
4786 case DataType::Type::kInt32:
4787 locations->SetInAt(0, Location::RequiresRegister());
4788 // The shift count needs to be in CL (unless it is a constant).
4789 locations->SetInAt(1, Location::ByteRegisterOrConstant(ECX, ror->InputAt(1)));
4790 locations->SetOut(Location::SameAsFirstInput());
4791 break;
4792 default:
4793 LOG(FATAL) << "Unexpected operation type " << ror->GetResultType();
4794 UNREACHABLE();
4795 }
4796 }
4797
VisitRor(HRor * ror)4798 void InstructionCodeGeneratorX86::VisitRor(HRor* ror) {
4799 LocationSummary* locations = ror->GetLocations();
4800 Location first = locations->InAt(0);
4801 Location second = locations->InAt(1);
4802
4803 if (ror->GetResultType() == DataType::Type::kInt32) {
4804 Register first_reg = first.AsRegister<Register>();
4805 if (second.IsRegister()) {
4806 Register second_reg = second.AsRegister<Register>();
4807 __ rorl(first_reg, second_reg);
4808 } else {
4809 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftDistance);
4810 __ rorl(first_reg, imm);
4811 }
4812 return;
4813 }
4814
4815 DCHECK_EQ(ror->GetResultType(), DataType::Type::kInt64);
4816 Register first_reg_lo = first.AsRegisterPairLow<Register>();
4817 Register first_reg_hi = first.AsRegisterPairHigh<Register>();
4818 Register temp_reg = locations->GetTemp(0).AsRegister<Register>();
4819 if (second.IsRegister()) {
4820 Register second_reg = second.AsRegister<Register>();
4821 DCHECK_EQ(second_reg, ECX);
4822 __ movl(temp_reg, first_reg_hi);
4823 __ shrd(first_reg_hi, first_reg_lo, second_reg);
4824 __ shrd(first_reg_lo, temp_reg, second_reg);
4825 __ movl(temp_reg, first_reg_hi);
4826 __ testl(second_reg, Immediate(32));
4827 __ cmovl(kNotEqual, first_reg_hi, first_reg_lo);
4828 __ cmovl(kNotEqual, first_reg_lo, temp_reg);
4829 } else {
4830 int32_t shift_amt = second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftDistance;
4831 if (shift_amt == 0) {
4832 // Already fine.
4833 return;
4834 }
4835 if (shift_amt == 32) {
4836 // Just swap.
4837 __ movl(temp_reg, first_reg_lo);
4838 __ movl(first_reg_lo, first_reg_hi);
4839 __ movl(first_reg_hi, temp_reg);
4840 return;
4841 }
4842
4843 Immediate imm(shift_amt);
4844 // Save the constents of the low value.
4845 __ movl(temp_reg, first_reg_lo);
4846
4847 // Shift right into low, feeding bits from high.
4848 __ shrd(first_reg_lo, first_reg_hi, imm);
4849
4850 // Shift right into high, feeding bits from the original low.
4851 __ shrd(first_reg_hi, temp_reg, imm);
4852
4853 // Swap if needed.
4854 if (shift_amt > 32) {
4855 __ movl(temp_reg, first_reg_lo);
4856 __ movl(first_reg_lo, first_reg_hi);
4857 __ movl(first_reg_hi, temp_reg);
4858 }
4859 }
4860 }
4861
VisitShl(HShl * shl)4862 void LocationsBuilderX86::VisitShl(HShl* shl) {
4863 HandleShift(shl);
4864 }
4865
VisitShl(HShl * shl)4866 void InstructionCodeGeneratorX86::VisitShl(HShl* shl) {
4867 HandleShift(shl);
4868 }
4869
VisitShr(HShr * shr)4870 void LocationsBuilderX86::VisitShr(HShr* shr) {
4871 HandleShift(shr);
4872 }
4873
VisitShr(HShr * shr)4874 void InstructionCodeGeneratorX86::VisitShr(HShr* shr) {
4875 HandleShift(shr);
4876 }
4877
VisitUShr(HUShr * ushr)4878 void LocationsBuilderX86::VisitUShr(HUShr* ushr) {
4879 HandleShift(ushr);
4880 }
4881
VisitUShr(HUShr * ushr)4882 void InstructionCodeGeneratorX86::VisitUShr(HUShr* ushr) {
4883 HandleShift(ushr);
4884 }
4885
VisitNewInstance(HNewInstance * instruction)4886 void LocationsBuilderX86::VisitNewInstance(HNewInstance* instruction) {
4887 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
4888 instruction, LocationSummary::kCallOnMainOnly);
4889 locations->SetOut(Location::RegisterLocation(EAX));
4890 InvokeRuntimeCallingConvention calling_convention;
4891 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
4892 }
4893
VisitNewInstance(HNewInstance * instruction)4894 void InstructionCodeGeneratorX86::VisitNewInstance(HNewInstance* instruction) {
4895 codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
4896 CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
4897 DCHECK(!codegen_->IsLeafMethod());
4898 }
4899
VisitNewArray(HNewArray * instruction)4900 void LocationsBuilderX86::VisitNewArray(HNewArray* instruction) {
4901 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
4902 instruction, LocationSummary::kCallOnMainOnly);
4903 locations->SetOut(Location::RegisterLocation(EAX));
4904 InvokeRuntimeCallingConvention calling_convention;
4905 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
4906 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
4907 }
4908
VisitNewArray(HNewArray * instruction)4909 void InstructionCodeGeneratorX86::VisitNewArray(HNewArray* instruction) {
4910 // Note: if heap poisoning is enabled, the entry point takes care of poisoning the reference.
4911 QuickEntrypointEnum entrypoint = CodeGenerator::GetArrayAllocationEntrypoint(instruction);
4912 codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc());
4913 CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>();
4914 DCHECK(!codegen_->IsLeafMethod());
4915 }
4916
VisitParameterValue(HParameterValue * instruction)4917 void LocationsBuilderX86::VisitParameterValue(HParameterValue* instruction) {
4918 LocationSummary* locations =
4919 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
4920 Location location = parameter_visitor_.GetNextLocation(instruction->GetType());
4921 if (location.IsStackSlot()) {
4922 location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
4923 } else if (location.IsDoubleStackSlot()) {
4924 location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
4925 }
4926 locations->SetOut(location);
4927 }
4928
VisitParameterValue(HParameterValue * instruction ATTRIBUTE_UNUSED)4929 void InstructionCodeGeneratorX86::VisitParameterValue(
4930 HParameterValue* instruction ATTRIBUTE_UNUSED) {
4931 }
4932
VisitCurrentMethod(HCurrentMethod * instruction)4933 void LocationsBuilderX86::VisitCurrentMethod(HCurrentMethod* instruction) {
4934 LocationSummary* locations =
4935 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
4936 locations->SetOut(Location::RegisterLocation(kMethodRegisterArgument));
4937 }
4938
VisitCurrentMethod(HCurrentMethod * instruction ATTRIBUTE_UNUSED)4939 void InstructionCodeGeneratorX86::VisitCurrentMethod(HCurrentMethod* instruction ATTRIBUTE_UNUSED) {
4940 }
4941
VisitClassTableGet(HClassTableGet * instruction)4942 void LocationsBuilderX86::VisitClassTableGet(HClassTableGet* instruction) {
4943 LocationSummary* locations =
4944 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
4945 locations->SetInAt(0, Location::RequiresRegister());
4946 locations->SetOut(Location::RequiresRegister());
4947 }
4948
VisitClassTableGet(HClassTableGet * instruction)4949 void InstructionCodeGeneratorX86::VisitClassTableGet(HClassTableGet* instruction) {
4950 LocationSummary* locations = instruction->GetLocations();
4951 if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) {
4952 uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
4953 instruction->GetIndex(), kX86PointerSize).SizeValue();
4954 __ movl(locations->Out().AsRegister<Register>(),
4955 Address(locations->InAt(0).AsRegister<Register>(), method_offset));
4956 } else {
4957 uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
4958 instruction->GetIndex(), kX86PointerSize));
4959 __ movl(locations->Out().AsRegister<Register>(),
4960 Address(locations->InAt(0).AsRegister<Register>(),
4961 mirror::Class::ImtPtrOffset(kX86PointerSize).Uint32Value()));
4962 // temp = temp->GetImtEntryAt(method_offset);
4963 __ movl(locations->Out().AsRegister<Register>(),
4964 Address(locations->Out().AsRegister<Register>(), method_offset));
4965 }
4966 }
4967
VisitNot(HNot * not_)4968 void LocationsBuilderX86::VisitNot(HNot* not_) {
4969 LocationSummary* locations =
4970 new (GetGraph()->GetAllocator()) LocationSummary(not_, LocationSummary::kNoCall);
4971 locations->SetInAt(0, Location::RequiresRegister());
4972 locations->SetOut(Location::SameAsFirstInput());
4973 }
4974
VisitNot(HNot * not_)4975 void InstructionCodeGeneratorX86::VisitNot(HNot* not_) {
4976 LocationSummary* locations = not_->GetLocations();
4977 Location in = locations->InAt(0);
4978 Location out = locations->Out();
4979 DCHECK(in.Equals(out));
4980 switch (not_->GetResultType()) {
4981 case DataType::Type::kInt32:
4982 __ notl(out.AsRegister<Register>());
4983 break;
4984
4985 case DataType::Type::kInt64:
4986 __ notl(out.AsRegisterPairLow<Register>());
4987 __ notl(out.AsRegisterPairHigh<Register>());
4988 break;
4989
4990 default:
4991 LOG(FATAL) << "Unimplemented type for not operation " << not_->GetResultType();
4992 }
4993 }
4994
VisitBooleanNot(HBooleanNot * bool_not)4995 void LocationsBuilderX86::VisitBooleanNot(HBooleanNot* bool_not) {
4996 LocationSummary* locations =
4997 new (GetGraph()->GetAllocator()) LocationSummary(bool_not, LocationSummary::kNoCall);
4998 locations->SetInAt(0, Location::RequiresRegister());
4999 locations->SetOut(Location::SameAsFirstInput());
5000 }
5001
VisitBooleanNot(HBooleanNot * bool_not)5002 void InstructionCodeGeneratorX86::VisitBooleanNot(HBooleanNot* bool_not) {
5003 LocationSummary* locations = bool_not->GetLocations();
5004 Location in = locations->InAt(0);
5005 Location out = locations->Out();
5006 DCHECK(in.Equals(out));
5007 __ xorl(out.AsRegister<Register>(), Immediate(1));
5008 }
5009
VisitCompare(HCompare * compare)5010 void LocationsBuilderX86::VisitCompare(HCompare* compare) {
5011 LocationSummary* locations =
5012 new (GetGraph()->GetAllocator()) LocationSummary(compare, LocationSummary::kNoCall);
5013 switch (compare->InputAt(0)->GetType()) {
5014 case DataType::Type::kBool:
5015 case DataType::Type::kUint8:
5016 case DataType::Type::kInt8:
5017 case DataType::Type::kUint16:
5018 case DataType::Type::kInt16:
5019 case DataType::Type::kInt32:
5020 case DataType::Type::kInt64: {
5021 locations->SetInAt(0, Location::RequiresRegister());
5022 locations->SetInAt(1, Location::Any());
5023 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5024 break;
5025 }
5026 case DataType::Type::kFloat32:
5027 case DataType::Type::kFloat64: {
5028 locations->SetInAt(0, Location::RequiresFpuRegister());
5029 if (compare->InputAt(1)->IsX86LoadFromConstantTable()) {
5030 DCHECK(compare->InputAt(1)->IsEmittedAtUseSite());
5031 } else if (compare->InputAt(1)->IsConstant()) {
5032 locations->SetInAt(1, Location::RequiresFpuRegister());
5033 } else {
5034 locations->SetInAt(1, Location::Any());
5035 }
5036 locations->SetOut(Location::RequiresRegister());
5037 break;
5038 }
5039 default:
5040 LOG(FATAL) << "Unexpected type for compare operation " << compare->InputAt(0)->GetType();
5041 }
5042 }
5043
VisitCompare(HCompare * compare)5044 void InstructionCodeGeneratorX86::VisitCompare(HCompare* compare) {
5045 LocationSummary* locations = compare->GetLocations();
5046 Register out = locations->Out().AsRegister<Register>();
5047 Location left = locations->InAt(0);
5048 Location right = locations->InAt(1);
5049
5050 NearLabel less, greater, done;
5051 Condition less_cond = kLess;
5052
5053 switch (compare->InputAt(0)->GetType()) {
5054 case DataType::Type::kBool:
5055 case DataType::Type::kUint8:
5056 case DataType::Type::kInt8:
5057 case DataType::Type::kUint16:
5058 case DataType::Type::kInt16:
5059 case DataType::Type::kInt32: {
5060 codegen_->GenerateIntCompare(left, right);
5061 break;
5062 }
5063 case DataType::Type::kInt64: {
5064 Register left_low = left.AsRegisterPairLow<Register>();
5065 Register left_high = left.AsRegisterPairHigh<Register>();
5066 int32_t val_low = 0;
5067 int32_t val_high = 0;
5068 bool right_is_const = false;
5069
5070 if (right.IsConstant()) {
5071 DCHECK(right.GetConstant()->IsLongConstant());
5072 right_is_const = true;
5073 int64_t val = right.GetConstant()->AsLongConstant()->GetValue();
5074 val_low = Low32Bits(val);
5075 val_high = High32Bits(val);
5076 }
5077
5078 if (right.IsRegisterPair()) {
5079 __ cmpl(left_high, right.AsRegisterPairHigh<Register>());
5080 } else if (right.IsDoubleStackSlot()) {
5081 __ cmpl(left_high, Address(ESP, right.GetHighStackIndex(kX86WordSize)));
5082 } else {
5083 DCHECK(right_is_const) << right;
5084 codegen_->Compare32BitValue(left_high, val_high);
5085 }
5086 __ j(kLess, &less); // Signed compare.
5087 __ j(kGreater, &greater); // Signed compare.
5088 if (right.IsRegisterPair()) {
5089 __ cmpl(left_low, right.AsRegisterPairLow<Register>());
5090 } else if (right.IsDoubleStackSlot()) {
5091 __ cmpl(left_low, Address(ESP, right.GetStackIndex()));
5092 } else {
5093 DCHECK(right_is_const) << right;
5094 codegen_->Compare32BitValue(left_low, val_low);
5095 }
5096 less_cond = kBelow; // for CF (unsigned).
5097 break;
5098 }
5099 case DataType::Type::kFloat32: {
5100 GenerateFPCompare(left, right, compare, false);
5101 __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
5102 less_cond = kBelow; // for CF (floats).
5103 break;
5104 }
5105 case DataType::Type::kFloat64: {
5106 GenerateFPCompare(left, right, compare, true);
5107 __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
5108 less_cond = kBelow; // for CF (floats).
5109 break;
5110 }
5111 default:
5112 LOG(FATAL) << "Unexpected type for compare operation " << compare->InputAt(0)->GetType();
5113 }
5114
5115 __ movl(out, Immediate(0));
5116 __ j(kEqual, &done);
5117 __ j(less_cond, &less);
5118
5119 __ Bind(&greater);
5120 __ movl(out, Immediate(1));
5121 __ jmp(&done);
5122
5123 __ Bind(&less);
5124 __ movl(out, Immediate(-1));
5125
5126 __ Bind(&done);
5127 }
5128
VisitPhi(HPhi * instruction)5129 void LocationsBuilderX86::VisitPhi(HPhi* instruction) {
5130 LocationSummary* locations =
5131 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5132 for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
5133 locations->SetInAt(i, Location::Any());
5134 }
5135 locations->SetOut(Location::Any());
5136 }
5137
VisitPhi(HPhi * instruction ATTRIBUTE_UNUSED)5138 void InstructionCodeGeneratorX86::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) {
5139 LOG(FATAL) << "Unreachable";
5140 }
5141
GenerateMemoryBarrier(MemBarrierKind kind)5142 void CodeGeneratorX86::GenerateMemoryBarrier(MemBarrierKind kind) {
5143 /*
5144 * According to the JSR-133 Cookbook, for x86 only StoreLoad/AnyAny barriers need memory fence.
5145 * All other barriers (LoadAny, AnyStore, StoreStore) are nops due to the x86 memory model.
5146 * For those cases, all we need to ensure is that there is a scheduling barrier in place.
5147 */
5148 switch (kind) {
5149 case MemBarrierKind::kAnyAny: {
5150 MemoryFence();
5151 break;
5152 }
5153 case MemBarrierKind::kAnyStore:
5154 case MemBarrierKind::kLoadAny:
5155 case MemBarrierKind::kStoreStore: {
5156 // nop
5157 break;
5158 }
5159 case MemBarrierKind::kNTStoreStore:
5160 // Non-Temporal Store/Store needs an explicit fence.
5161 MemoryFence(/* non-temporal= */ true);
5162 break;
5163 }
5164 }
5165
GetSupportedInvokeStaticOrDirectDispatch(const HInvokeStaticOrDirect::DispatchInfo & desired_dispatch_info,ArtMethod * method ATTRIBUTE_UNUSED)5166 HInvokeStaticOrDirect::DispatchInfo CodeGeneratorX86::GetSupportedInvokeStaticOrDirectDispatch(
5167 const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
5168 ArtMethod* method ATTRIBUTE_UNUSED) {
5169 return desired_dispatch_info;
5170 }
5171
GetInvokeExtraParameter(HInvoke * invoke,Register temp)5172 Register CodeGeneratorX86::GetInvokeExtraParameter(HInvoke* invoke, Register temp) {
5173 if (invoke->IsInvokeStaticOrDirect()) {
5174 return GetInvokeStaticOrDirectExtraParameter(invoke->AsInvokeStaticOrDirect(), temp);
5175 }
5176 DCHECK(invoke->IsInvokeInterface());
5177 Location location =
5178 invoke->GetLocations()->InAt(invoke->AsInvokeInterface()->GetSpecialInputIndex());
5179 return location.AsRegister<Register>();
5180 }
5181
GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect * invoke,Register temp)5182 Register CodeGeneratorX86::GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke,
5183 Register temp) {
5184 Location location = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
5185 if (!invoke->GetLocations()->Intrinsified()) {
5186 return location.AsRegister<Register>();
5187 }
5188 // For intrinsics we allow any location, so it may be on the stack.
5189 if (!location.IsRegister()) {
5190 __ movl(temp, Address(ESP, location.GetStackIndex()));
5191 return temp;
5192 }
5193 // For register locations, check if the register was saved. If so, get it from the stack.
5194 // Note: There is a chance that the register was saved but not overwritten, so we could
5195 // save one load. However, since this is just an intrinsic slow path we prefer this
5196 // simple and more robust approach rather that trying to determine if that's the case.
5197 SlowPathCode* slow_path = GetCurrentSlowPath();
5198 DCHECK(slow_path != nullptr); // For intrinsified invokes the call is emitted on the slow path.
5199 if (slow_path->IsCoreRegisterSaved(location.AsRegister<Register>())) {
5200 int stack_offset = slow_path->GetStackOffsetOfCoreRegister(location.AsRegister<Register>());
5201 __ movl(temp, Address(ESP, stack_offset));
5202 return temp;
5203 }
5204 return location.AsRegister<Register>();
5205 }
5206
LoadMethod(MethodLoadKind load_kind,Location temp,HInvoke * invoke)5207 void CodeGeneratorX86::LoadMethod(MethodLoadKind load_kind, Location temp, HInvoke* invoke) {
5208 switch (load_kind) {
5209 case MethodLoadKind::kBootImageLinkTimePcRelative: {
5210 DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension());
5211 Register base_reg = GetInvokeExtraParameter(invoke, temp.AsRegister<Register>());
5212 __ leal(temp.AsRegister<Register>(),
5213 Address(base_reg, CodeGeneratorX86::kPlaceholder32BitOffset));
5214 RecordBootImageMethodPatch(invoke);
5215 break;
5216 }
5217 case MethodLoadKind::kBootImageRelRo: {
5218 size_t index = invoke->IsInvokeInterface()
5219 ? invoke->AsInvokeInterface()->GetSpecialInputIndex()
5220 : invoke->AsInvokeStaticOrDirect()->GetSpecialInputIndex();
5221 Register base_reg = GetInvokeExtraParameter(invoke, temp.AsRegister<Register>());
5222 __ movl(temp.AsRegister<Register>(), Address(base_reg, kPlaceholder32BitOffset));
5223 RecordBootImageRelRoPatch(
5224 invoke->InputAt(index)->AsX86ComputeBaseMethodAddress(),
5225 GetBootImageOffset(invoke));
5226 break;
5227 }
5228 case MethodLoadKind::kBssEntry: {
5229 Register base_reg = GetInvokeExtraParameter(invoke, temp.AsRegister<Register>());
5230 __ movl(temp.AsRegister<Register>(), Address(base_reg, kPlaceholder32BitOffset));
5231 RecordMethodBssEntryPatch(invoke);
5232 // No need for memory fence, thanks to the x86 memory model.
5233 break;
5234 }
5235 case MethodLoadKind::kJitDirectAddress: {
5236 __ movl(temp.AsRegister<Register>(),
5237 Immediate(reinterpret_cast32<uint32_t>(invoke->GetResolvedMethod())));
5238 break;
5239 }
5240 case MethodLoadKind::kRuntimeCall: {
5241 // Test situation, don't do anything.
5242 break;
5243 }
5244 default: {
5245 LOG(FATAL) << "Load kind should have already been handled " << load_kind;
5246 UNREACHABLE();
5247 }
5248 }
5249 }
5250
GenerateStaticOrDirectCall(HInvokeStaticOrDirect * invoke,Location temp,SlowPathCode * slow_path)5251 void CodeGeneratorX86::GenerateStaticOrDirectCall(
5252 HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path) {
5253 Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp.
5254 switch (invoke->GetMethodLoadKind()) {
5255 case MethodLoadKind::kStringInit: {
5256 // temp = thread->string_init_entrypoint
5257 uint32_t offset =
5258 GetThreadOffset<kX86PointerSize>(invoke->GetStringInitEntryPoint()).Int32Value();
5259 __ fs()->movl(temp.AsRegister<Register>(), Address::Absolute(offset));
5260 break;
5261 }
5262 case MethodLoadKind::kRecursive: {
5263 callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodIndex());
5264 break;
5265 }
5266 case MethodLoadKind::kRuntimeCall: {
5267 GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path);
5268 return; // No code pointer retrieval; the runtime performs the call directly.
5269 }
5270 case MethodLoadKind::kBootImageLinkTimePcRelative:
5271 // For kCallCriticalNative we skip loading the method and do the call directly.
5272 if (invoke->GetCodePtrLocation() == CodePtrLocation::kCallCriticalNative) {
5273 break;
5274 }
5275 FALLTHROUGH_INTENDED;
5276 default: {
5277 LoadMethod(invoke->GetMethodLoadKind(), callee_method, invoke);
5278 }
5279 }
5280
5281 switch (invoke->GetCodePtrLocation()) {
5282 case CodePtrLocation::kCallSelf:
5283 __ call(GetFrameEntryLabel());
5284 RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
5285 break;
5286 case CodePtrLocation::kCallCriticalNative: {
5287 size_t out_frame_size =
5288 PrepareCriticalNativeCall<CriticalNativeCallingConventionVisitorX86,
5289 kNativeStackAlignment,
5290 GetCriticalNativeDirectCallFrameSize>(invoke);
5291 if (invoke->GetMethodLoadKind() == MethodLoadKind::kBootImageLinkTimePcRelative) {
5292 DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension());
5293 Register base_reg = GetInvokeExtraParameter(invoke, temp.AsRegister<Register>());
5294 __ call(Address(base_reg, CodeGeneratorX86::kPlaceholder32BitOffset));
5295 RecordBootImageJniEntrypointPatch(invoke);
5296 } else {
5297 // (callee_method + offset_of_jni_entry_point)()
5298 __ call(Address(callee_method.AsRegister<Register>(),
5299 ArtMethod::EntryPointFromJniOffset(kX86PointerSize).Int32Value()));
5300 }
5301 RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
5302 if (out_frame_size == 0u && DataType::IsFloatingPointType(invoke->GetType())) {
5303 // Create space for conversion.
5304 out_frame_size = 8u;
5305 IncreaseFrame(out_frame_size);
5306 }
5307 // Zero-/sign-extend or move the result when needed due to native and managed ABI mismatch.
5308 switch (invoke->GetType()) {
5309 case DataType::Type::kBool:
5310 __ movzxb(EAX, AL);
5311 break;
5312 case DataType::Type::kInt8:
5313 __ movsxb(EAX, AL);
5314 break;
5315 case DataType::Type::kUint16:
5316 __ movzxw(EAX, EAX);
5317 break;
5318 case DataType::Type::kInt16:
5319 __ movsxw(EAX, EAX);
5320 break;
5321 case DataType::Type::kFloat32:
5322 __ fstps(Address(ESP, 0));
5323 __ movss(XMM0, Address(ESP, 0));
5324 break;
5325 case DataType::Type::kFloat64:
5326 __ fstpl(Address(ESP, 0));
5327 __ movsd(XMM0, Address(ESP, 0));
5328 break;
5329 case DataType::Type::kInt32:
5330 case DataType::Type::kInt64:
5331 case DataType::Type::kVoid:
5332 break;
5333 default:
5334 DCHECK(false) << invoke->GetType();
5335 break;
5336 }
5337 if (out_frame_size != 0u) {
5338 DecreaseFrame(out_frame_size);
5339 }
5340 break;
5341 }
5342 case CodePtrLocation::kCallArtMethod:
5343 // (callee_method + offset_of_quick_compiled_code)()
5344 __ call(Address(callee_method.AsRegister<Register>(),
5345 ArtMethod::EntryPointFromQuickCompiledCodeOffset(
5346 kX86PointerSize).Int32Value()));
5347 RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
5348 break;
5349 }
5350
5351 DCHECK(!IsLeafMethod());
5352 }
5353
GenerateVirtualCall(HInvokeVirtual * invoke,Location temp_in,SlowPathCode * slow_path)5354 void CodeGeneratorX86::GenerateVirtualCall(
5355 HInvokeVirtual* invoke, Location temp_in, SlowPathCode* slow_path) {
5356 Register temp = temp_in.AsRegister<Register>();
5357 uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
5358 invoke->GetVTableIndex(), kX86PointerSize).Uint32Value();
5359
5360 // Use the calling convention instead of the location of the receiver, as
5361 // intrinsics may have put the receiver in a different register. In the intrinsics
5362 // slow path, the arguments have been moved to the right place, so here we are
5363 // guaranteed that the receiver is the first register of the calling convention.
5364 InvokeDexCallingConvention calling_convention;
5365 Register receiver = calling_convention.GetRegisterAt(0);
5366 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
5367 // /* HeapReference<Class> */ temp = receiver->klass_
5368 __ movl(temp, Address(receiver, class_offset));
5369 MaybeRecordImplicitNullCheck(invoke);
5370 // Instead of simply (possibly) unpoisoning `temp` here, we should
5371 // emit a read barrier for the previous class reference load.
5372 // However this is not required in practice, as this is an
5373 // intermediate/temporary reference and because the current
5374 // concurrent copying collector keeps the from-space memory
5375 // intact/accessible until the end of the marking phase (the
5376 // concurrent copying collector may not in the future).
5377 __ MaybeUnpoisonHeapReference(temp);
5378
5379 MaybeGenerateInlineCacheCheck(invoke, temp);
5380
5381 // temp = temp->GetMethodAt(method_offset);
5382 __ movl(temp, Address(temp, method_offset));
5383 // call temp->GetEntryPoint();
5384 __ call(Address(
5385 temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86PointerSize).Int32Value()));
5386 RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
5387 }
5388
RecordBootImageIntrinsicPatch(HX86ComputeBaseMethodAddress * method_address,uint32_t intrinsic_data)5389 void CodeGeneratorX86::RecordBootImageIntrinsicPatch(HX86ComputeBaseMethodAddress* method_address,
5390 uint32_t intrinsic_data) {
5391 boot_image_other_patches_.emplace_back(
5392 method_address, /* target_dex_file= */ nullptr, intrinsic_data);
5393 __ Bind(&boot_image_other_patches_.back().label);
5394 }
5395
RecordBootImageRelRoPatch(HX86ComputeBaseMethodAddress * method_address,uint32_t boot_image_offset)5396 void CodeGeneratorX86::RecordBootImageRelRoPatch(HX86ComputeBaseMethodAddress* method_address,
5397 uint32_t boot_image_offset) {
5398 boot_image_other_patches_.emplace_back(
5399 method_address, /* target_dex_file= */ nullptr, boot_image_offset);
5400 __ Bind(&boot_image_other_patches_.back().label);
5401 }
5402
RecordBootImageMethodPatch(HInvoke * invoke)5403 void CodeGeneratorX86::RecordBootImageMethodPatch(HInvoke* invoke) {
5404 size_t index = invoke->IsInvokeInterface()
5405 ? invoke->AsInvokeInterface()->GetSpecialInputIndex()
5406 : invoke->AsInvokeStaticOrDirect()->GetSpecialInputIndex();
5407 HX86ComputeBaseMethodAddress* method_address =
5408 invoke->InputAt(index)->AsX86ComputeBaseMethodAddress();
5409 boot_image_method_patches_.emplace_back(
5410 method_address,
5411 invoke->GetResolvedMethodReference().dex_file,
5412 invoke->GetResolvedMethodReference().index);
5413 __ Bind(&boot_image_method_patches_.back().label);
5414 }
5415
RecordMethodBssEntryPatch(HInvoke * invoke)5416 void CodeGeneratorX86::RecordMethodBssEntryPatch(HInvoke* invoke) {
5417 size_t index = invoke->IsInvokeInterface()
5418 ? invoke->AsInvokeInterface()->GetSpecialInputIndex()
5419 : invoke->AsInvokeStaticOrDirect()->GetSpecialInputIndex();
5420 DCHECK(IsSameDexFile(GetGraph()->GetDexFile(), *invoke->GetMethodReference().dex_file));
5421 HX86ComputeBaseMethodAddress* method_address =
5422 invoke->InputAt(index)->AsX86ComputeBaseMethodAddress();
5423 // Add the patch entry and bind its label at the end of the instruction.
5424 method_bss_entry_patches_.emplace_back(
5425 method_address,
5426 invoke->GetMethodReference().dex_file,
5427 invoke->GetMethodReference().index);
5428 __ Bind(&method_bss_entry_patches_.back().label);
5429 }
5430
RecordBootImageTypePatch(HLoadClass * load_class)5431 void CodeGeneratorX86::RecordBootImageTypePatch(HLoadClass* load_class) {
5432 HX86ComputeBaseMethodAddress* method_address =
5433 load_class->InputAt(0)->AsX86ComputeBaseMethodAddress();
5434 boot_image_type_patches_.emplace_back(
5435 method_address, &load_class->GetDexFile(), load_class->GetTypeIndex().index_);
5436 __ Bind(&boot_image_type_patches_.back().label);
5437 }
5438
NewTypeBssEntryPatch(HLoadClass * load_class)5439 Label* CodeGeneratorX86::NewTypeBssEntryPatch(HLoadClass* load_class) {
5440 HX86ComputeBaseMethodAddress* method_address =
5441 load_class->InputAt(0)->AsX86ComputeBaseMethodAddress();
5442 ArenaDeque<X86PcRelativePatchInfo>* patches = nullptr;
5443 switch (load_class->GetLoadKind()) {
5444 case HLoadClass::LoadKind::kBssEntry:
5445 patches = &type_bss_entry_patches_;
5446 break;
5447 case HLoadClass::LoadKind::kBssEntryPublic:
5448 patches = &public_type_bss_entry_patches_;
5449 break;
5450 case HLoadClass::LoadKind::kBssEntryPackage:
5451 patches = &package_type_bss_entry_patches_;
5452 break;
5453 default:
5454 LOG(FATAL) << "Unexpected load kind: " << load_class->GetLoadKind();
5455 UNREACHABLE();
5456 }
5457 patches->emplace_back(
5458 method_address, &load_class->GetDexFile(), load_class->GetTypeIndex().index_);
5459 return &patches->back().label;
5460 }
5461
RecordBootImageStringPatch(HLoadString * load_string)5462 void CodeGeneratorX86::RecordBootImageStringPatch(HLoadString* load_string) {
5463 HX86ComputeBaseMethodAddress* method_address =
5464 load_string->InputAt(0)->AsX86ComputeBaseMethodAddress();
5465 boot_image_string_patches_.emplace_back(
5466 method_address, &load_string->GetDexFile(), load_string->GetStringIndex().index_);
5467 __ Bind(&boot_image_string_patches_.back().label);
5468 }
5469
NewStringBssEntryPatch(HLoadString * load_string)5470 Label* CodeGeneratorX86::NewStringBssEntryPatch(HLoadString* load_string) {
5471 HX86ComputeBaseMethodAddress* method_address =
5472 load_string->InputAt(0)->AsX86ComputeBaseMethodAddress();
5473 string_bss_entry_patches_.emplace_back(
5474 method_address, &load_string->GetDexFile(), load_string->GetStringIndex().index_);
5475 return &string_bss_entry_patches_.back().label;
5476 }
5477
RecordBootImageJniEntrypointPatch(HInvokeStaticOrDirect * invoke)5478 void CodeGeneratorX86::RecordBootImageJniEntrypointPatch(HInvokeStaticOrDirect* invoke) {
5479 HX86ComputeBaseMethodAddress* method_address =
5480 invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress();
5481 boot_image_jni_entrypoint_patches_.emplace_back(
5482 method_address,
5483 invoke->GetResolvedMethodReference().dex_file,
5484 invoke->GetResolvedMethodReference().index);
5485 __ Bind(&boot_image_jni_entrypoint_patches_.back().label);
5486 }
5487
LoadBootImageAddress(Register reg,uint32_t boot_image_reference,HInvokeStaticOrDirect * invoke)5488 void CodeGeneratorX86::LoadBootImageAddress(Register reg,
5489 uint32_t boot_image_reference,
5490 HInvokeStaticOrDirect* invoke) {
5491 if (GetCompilerOptions().IsBootImage()) {
5492 HX86ComputeBaseMethodAddress* method_address =
5493 invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress();
5494 DCHECK(method_address != nullptr);
5495 Register method_address_reg =
5496 invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()).AsRegister<Register>();
5497 __ leal(reg, Address(method_address_reg, CodeGeneratorX86::kPlaceholder32BitOffset));
5498 RecordBootImageIntrinsicPatch(method_address, boot_image_reference);
5499 } else if (GetCompilerOptions().GetCompilePic()) {
5500 HX86ComputeBaseMethodAddress* method_address =
5501 invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress();
5502 DCHECK(method_address != nullptr);
5503 Register method_address_reg =
5504 invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()).AsRegister<Register>();
5505 __ movl(reg, Address(method_address_reg, CodeGeneratorX86::kPlaceholder32BitOffset));
5506 RecordBootImageRelRoPatch(method_address, boot_image_reference);
5507 } else {
5508 DCHECK(GetCompilerOptions().IsJitCompiler());
5509 gc::Heap* heap = Runtime::Current()->GetHeap();
5510 DCHECK(!heap->GetBootImageSpaces().empty());
5511 const uint8_t* address = heap->GetBootImageSpaces()[0]->Begin() + boot_image_reference;
5512 __ movl(reg, Immediate(dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(address))));
5513 }
5514 }
5515
LoadIntrinsicDeclaringClass(Register reg,HInvokeStaticOrDirect * invoke)5516 void CodeGeneratorX86::LoadIntrinsicDeclaringClass(Register reg, HInvokeStaticOrDirect* invoke) {
5517 DCHECK_NE(invoke->GetIntrinsic(), Intrinsics::kNone);
5518 if (GetCompilerOptions().IsBootImage()) {
5519 // Load the class the same way as for HLoadClass::LoadKind::kBootImageLinkTimePcRelative.
5520 HX86ComputeBaseMethodAddress* method_address =
5521 invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress();
5522 DCHECK(method_address != nullptr);
5523 Register method_address_reg =
5524 invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()).AsRegister<Register>();
5525 __ leal(reg, Address(method_address_reg, CodeGeneratorX86::kPlaceholder32BitOffset));
5526 MethodReference target_method = invoke->GetResolvedMethodReference();
5527 dex::TypeIndex type_idx = target_method.dex_file->GetMethodId(target_method.index).class_idx_;
5528 boot_image_type_patches_.emplace_back(method_address, target_method.dex_file, type_idx.index_);
5529 __ Bind(&boot_image_type_patches_.back().label);
5530 } else {
5531 uint32_t boot_image_offset = GetBootImageOffsetOfIntrinsicDeclaringClass(invoke);
5532 LoadBootImageAddress(reg, boot_image_offset, invoke);
5533 }
5534 }
5535
5536 // The label points to the end of the "movl" or another instruction but the literal offset
5537 // for method patch needs to point to the embedded constant which occupies the last 4 bytes.
5538 constexpr uint32_t kLabelPositionToLiteralOffsetAdjustment = 4u;
5539
5540 template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
EmitPcRelativeLinkerPatches(const ArenaDeque<X86PcRelativePatchInfo> & infos,ArenaVector<linker::LinkerPatch> * linker_patches)5541 inline void CodeGeneratorX86::EmitPcRelativeLinkerPatches(
5542 const ArenaDeque<X86PcRelativePatchInfo>& infos,
5543 ArenaVector<linker::LinkerPatch>* linker_patches) {
5544 for (const X86PcRelativePatchInfo& info : infos) {
5545 uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
5546 linker_patches->push_back(Factory(literal_offset,
5547 info.target_dex_file,
5548 GetMethodAddressOffset(info.method_address),
5549 info.offset_or_index));
5550 }
5551 }
5552
5553 template <linker::LinkerPatch (*Factory)(size_t, uint32_t, uint32_t)>
NoDexFileAdapter(size_t literal_offset,const DexFile * target_dex_file,uint32_t pc_insn_offset,uint32_t boot_image_offset)5554 linker::LinkerPatch NoDexFileAdapter(size_t literal_offset,
5555 const DexFile* target_dex_file,
5556 uint32_t pc_insn_offset,
5557 uint32_t boot_image_offset) {
5558 DCHECK(target_dex_file == nullptr); // Unused for these patches, should be null.
5559 return Factory(literal_offset, pc_insn_offset, boot_image_offset);
5560 }
5561
EmitLinkerPatches(ArenaVector<linker::LinkerPatch> * linker_patches)5562 void CodeGeneratorX86::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) {
5563 DCHECK(linker_patches->empty());
5564 size_t size =
5565 boot_image_method_patches_.size() +
5566 method_bss_entry_patches_.size() +
5567 boot_image_type_patches_.size() +
5568 type_bss_entry_patches_.size() +
5569 public_type_bss_entry_patches_.size() +
5570 package_type_bss_entry_patches_.size() +
5571 boot_image_string_patches_.size() +
5572 string_bss_entry_patches_.size() +
5573 boot_image_jni_entrypoint_patches_.size() +
5574 boot_image_other_patches_.size();
5575 linker_patches->reserve(size);
5576 if (GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension()) {
5577 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>(
5578 boot_image_method_patches_, linker_patches);
5579 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeTypePatch>(
5580 boot_image_type_patches_, linker_patches);
5581 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>(
5582 boot_image_string_patches_, linker_patches);
5583 } else {
5584 DCHECK(boot_image_method_patches_.empty());
5585 DCHECK(boot_image_type_patches_.empty());
5586 DCHECK(boot_image_string_patches_.empty());
5587 }
5588 if (GetCompilerOptions().IsBootImage()) {
5589 EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::IntrinsicReferencePatch>>(
5590 boot_image_other_patches_, linker_patches);
5591 } else {
5592 EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::DataBimgRelRoPatch>>(
5593 boot_image_other_patches_, linker_patches);
5594 }
5595 EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>(
5596 method_bss_entry_patches_, linker_patches);
5597 EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeBssEntryPatch>(
5598 type_bss_entry_patches_, linker_patches);
5599 EmitPcRelativeLinkerPatches<linker::LinkerPatch::PublicTypeBssEntryPatch>(
5600 public_type_bss_entry_patches_, linker_patches);
5601 EmitPcRelativeLinkerPatches<linker::LinkerPatch::PackageTypeBssEntryPatch>(
5602 package_type_bss_entry_patches_, linker_patches);
5603 EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringBssEntryPatch>(
5604 string_bss_entry_patches_, linker_patches);
5605 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeJniEntrypointPatch>(
5606 boot_image_jni_entrypoint_patches_, linker_patches);
5607 DCHECK_EQ(size, linker_patches->size());
5608 }
5609
MarkGCCard(Register temp,Register card,Register object,Register value,bool value_can_be_null)5610 void CodeGeneratorX86::MarkGCCard(Register temp,
5611 Register card,
5612 Register object,
5613 Register value,
5614 bool value_can_be_null) {
5615 NearLabel is_null;
5616 if (value_can_be_null) {
5617 __ testl(value, value);
5618 __ j(kEqual, &is_null);
5619 }
5620 // Load the address of the card table into `card`.
5621 __ fs()->movl(card, Address::Absolute(Thread::CardTableOffset<kX86PointerSize>().Int32Value()));
5622 // Calculate the offset (in the card table) of the card corresponding to
5623 // `object`.
5624 __ movl(temp, object);
5625 __ shrl(temp, Immediate(gc::accounting::CardTable::kCardShift));
5626 // Write the `art::gc::accounting::CardTable::kCardDirty` value into the
5627 // `object`'s card.
5628 //
5629 // Register `card` contains the address of the card table. Note that the card
5630 // table's base is biased during its creation so that it always starts at an
5631 // address whose least-significant byte is equal to `kCardDirty` (see
5632 // art::gc::accounting::CardTable::Create). Therefore the MOVB instruction
5633 // below writes the `kCardDirty` (byte) value into the `object`'s card
5634 // (located at `card + object >> kCardShift`).
5635 //
5636 // This dual use of the value in register `card` (1. to calculate the location
5637 // of the card to mark; and 2. to load the `kCardDirty` value) saves a load
5638 // (no need to explicitly load `kCardDirty` as an immediate value).
5639 __ movb(Address(temp, card, TIMES_1, 0),
5640 X86ManagedRegister::FromCpuRegister(card).AsByteRegister());
5641 if (value_can_be_null) {
5642 __ Bind(&is_null);
5643 }
5644 }
5645
HandleFieldGet(HInstruction * instruction,const FieldInfo & field_info)5646 void LocationsBuilderX86::HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info) {
5647 DCHECK(instruction->IsInstanceFieldGet() ||
5648 instruction->IsStaticFieldGet() ||
5649 instruction->IsPredicatedInstanceFieldGet());
5650
5651 bool object_field_get_with_read_barrier =
5652 kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference);
5653 bool is_predicated = instruction->IsPredicatedInstanceFieldGet();
5654 LocationSummary* locations =
5655 new (GetGraph()->GetAllocator()) LocationSummary(instruction,
5656 kEmitCompilerReadBarrier
5657 ? LocationSummary::kCallOnSlowPath
5658 : LocationSummary::kNoCall);
5659 if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
5660 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
5661 }
5662 // receiver_input
5663 locations->SetInAt(is_predicated ? 1 : 0, Location::RequiresRegister());
5664 if (is_predicated) {
5665 if (DataType::IsFloatingPointType(instruction->GetType())) {
5666 locations->SetInAt(0, Location::RequiresFpuRegister());
5667 } else {
5668 locations->SetInAt(0, Location::RequiresRegister());
5669 }
5670 }
5671 if (DataType::IsFloatingPointType(instruction->GetType())) {
5672 locations->SetOut(is_predicated ? Location::SameAsFirstInput()
5673 : Location::RequiresFpuRegister());
5674 } else {
5675 // The output overlaps in case of long: we don't want the low move
5676 // to overwrite the object's location. Likewise, in the case of
5677 // an object field get with read barriers enabled, we do not want
5678 // the move to overwrite the object's location, as we need it to emit
5679 // the read barrier.
5680 locations->SetOut(is_predicated ? Location::SameAsFirstInput() : Location::RequiresRegister(),
5681 (object_field_get_with_read_barrier ||
5682 instruction->GetType() == DataType::Type::kInt64 ||
5683 is_predicated)
5684 ? Location::kOutputOverlap
5685 : Location::kNoOutputOverlap);
5686 }
5687
5688 if (field_info.IsVolatile() && (field_info.GetFieldType() == DataType::Type::kInt64)) {
5689 // Long values can be loaded atomically into an XMM using movsd.
5690 // So we use an XMM register as a temp to achieve atomicity (first
5691 // load the temp into the XMM and then copy the XMM into the
5692 // output, 32 bits at a time).
5693 locations->AddTemp(Location::RequiresFpuRegister());
5694 }
5695 }
5696
HandleFieldGet(HInstruction * instruction,const FieldInfo & field_info)5697 void InstructionCodeGeneratorX86::HandleFieldGet(HInstruction* instruction,
5698 const FieldInfo& field_info) {
5699 DCHECK(instruction->IsInstanceFieldGet() ||
5700 instruction->IsStaticFieldGet() ||
5701 instruction->IsPredicatedInstanceFieldGet());
5702
5703 LocationSummary* locations = instruction->GetLocations();
5704 Location base_loc = locations->InAt(instruction->IsPredicatedInstanceFieldGet() ? 1 : 0);
5705 Register base = base_loc.AsRegister<Register>();
5706 Location out = locations->Out();
5707 bool is_volatile = field_info.IsVolatile();
5708 DCHECK_EQ(DataType::Size(field_info.GetFieldType()), DataType::Size(instruction->GetType()));
5709 DataType::Type load_type = instruction->GetType();
5710 uint32_t offset = field_info.GetFieldOffset().Uint32Value();
5711
5712 switch (load_type) {
5713 case DataType::Type::kBool:
5714 case DataType::Type::kUint8: {
5715 __ movzxb(out.AsRegister<Register>(), Address(base, offset));
5716 break;
5717 }
5718
5719 case DataType::Type::kInt8: {
5720 __ movsxb(out.AsRegister<Register>(), Address(base, offset));
5721 break;
5722 }
5723
5724 case DataType::Type::kUint16: {
5725 __ movzxw(out.AsRegister<Register>(), Address(base, offset));
5726 break;
5727 }
5728
5729 case DataType::Type::kInt16: {
5730 __ movsxw(out.AsRegister<Register>(), Address(base, offset));
5731 break;
5732 }
5733
5734 case DataType::Type::kInt32:
5735 __ movl(out.AsRegister<Register>(), Address(base, offset));
5736 break;
5737
5738 case DataType::Type::kReference: {
5739 // /* HeapReference<Object> */ out = *(base + offset)
5740 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
5741 // Note that a potential implicit null check is handled in this
5742 // CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier call.
5743 codegen_->GenerateFieldLoadWithBakerReadBarrier(
5744 instruction, out, base, offset, /* needs_null_check= */ true);
5745 if (is_volatile) {
5746 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
5747 }
5748 } else {
5749 __ movl(out.AsRegister<Register>(), Address(base, offset));
5750 codegen_->MaybeRecordImplicitNullCheck(instruction);
5751 if (is_volatile) {
5752 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
5753 }
5754 // If read barriers are enabled, emit read barriers other than
5755 // Baker's using a slow path (and also unpoison the loaded
5756 // reference, if heap poisoning is enabled).
5757 codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, base_loc, offset);
5758 }
5759 break;
5760 }
5761
5762 case DataType::Type::kInt64: {
5763 if (is_volatile) {
5764 XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
5765 __ movsd(temp, Address(base, offset));
5766 codegen_->MaybeRecordImplicitNullCheck(instruction);
5767 __ movd(out.AsRegisterPairLow<Register>(), temp);
5768 __ psrlq(temp, Immediate(32));
5769 __ movd(out.AsRegisterPairHigh<Register>(), temp);
5770 } else {
5771 DCHECK_NE(base, out.AsRegisterPairLow<Register>());
5772 __ movl(out.AsRegisterPairLow<Register>(), Address(base, offset));
5773 codegen_->MaybeRecordImplicitNullCheck(instruction);
5774 __ movl(out.AsRegisterPairHigh<Register>(), Address(base, kX86WordSize + offset));
5775 }
5776 break;
5777 }
5778
5779 case DataType::Type::kFloat32: {
5780 __ movss(out.AsFpuRegister<XmmRegister>(), Address(base, offset));
5781 break;
5782 }
5783
5784 case DataType::Type::kFloat64: {
5785 __ movsd(out.AsFpuRegister<XmmRegister>(), Address(base, offset));
5786 break;
5787 }
5788
5789 case DataType::Type::kUint32:
5790 case DataType::Type::kUint64:
5791 case DataType::Type::kVoid:
5792 LOG(FATAL) << "Unreachable type " << load_type;
5793 UNREACHABLE();
5794 }
5795
5796 if (load_type == DataType::Type::kReference || load_type == DataType::Type::kInt64) {
5797 // Potential implicit null checks, in the case of reference or
5798 // long fields, are handled in the previous switch statement.
5799 } else {
5800 codegen_->MaybeRecordImplicitNullCheck(instruction);
5801 }
5802
5803 if (is_volatile) {
5804 if (load_type == DataType::Type::kReference) {
5805 // Memory barriers, in the case of references, are also handled
5806 // in the previous switch statement.
5807 } else {
5808 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
5809 }
5810 }
5811 }
5812
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info)5813 void LocationsBuilderX86::HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info) {
5814 DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
5815
5816 LocationSummary* locations =
5817 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5818 locations->SetInAt(0, Location::RequiresRegister());
5819 bool is_volatile = field_info.IsVolatile();
5820 DataType::Type field_type = field_info.GetFieldType();
5821 bool is_byte_type = DataType::Size(field_type) == 1u;
5822
5823 // The register allocator does not support multiple
5824 // inputs that die at entry with one in a specific register.
5825 if (is_byte_type) {
5826 // Ensure the value is in a byte register.
5827 locations->SetInAt(1, Location::RegisterLocation(EAX));
5828 } else if (DataType::IsFloatingPointType(field_type)) {
5829 if (is_volatile && field_type == DataType::Type::kFloat64) {
5830 // In order to satisfy the semantics of volatile, this must be a single instruction store.
5831 locations->SetInAt(1, Location::RequiresFpuRegister());
5832 } else {
5833 locations->SetInAt(1, Location::FpuRegisterOrConstant(instruction->InputAt(1)));
5834 }
5835 } else if (is_volatile && field_type == DataType::Type::kInt64) {
5836 // In order to satisfy the semantics of volatile, this must be a single instruction store.
5837 locations->SetInAt(1, Location::RequiresRegister());
5838
5839 // 64bits value can be atomically written to an address with movsd and an XMM register.
5840 // We need two XMM registers because there's no easier way to (bit) copy a register pair
5841 // into a single XMM register (we copy each pair part into the XMMs and then interleave them).
5842 // NB: We could make the register allocator understand fp_reg <-> core_reg moves but given the
5843 // isolated cases when we need this it isn't worth adding the extra complexity.
5844 locations->AddTemp(Location::RequiresFpuRegister());
5845 locations->AddTemp(Location::RequiresFpuRegister());
5846 } else {
5847 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
5848
5849 if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) {
5850 // Temporary registers for the write barrier.
5851 locations->AddTemp(Location::RequiresRegister()); // May be used for reference poisoning too.
5852 // Ensure the card is in a byte register.
5853 locations->AddTemp(Location::RegisterLocation(ECX));
5854 }
5855 }
5856 }
5857
HandleFieldSet(HInstruction * instruction,uint32_t value_index,DataType::Type field_type,Address field_addr,Register base,bool is_volatile,bool value_can_be_null)5858 void InstructionCodeGeneratorX86::HandleFieldSet(HInstruction* instruction,
5859 uint32_t value_index,
5860 DataType::Type field_type,
5861 Address field_addr,
5862 Register base,
5863 bool is_volatile,
5864 bool value_can_be_null) {
5865 LocationSummary* locations = instruction->GetLocations();
5866 Location value = locations->InAt(value_index);
5867 bool needs_write_barrier =
5868 CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(value_index));
5869
5870 if (is_volatile) {
5871 codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
5872 }
5873
5874 bool maybe_record_implicit_null_check_done = false;
5875
5876 switch (field_type) {
5877 case DataType::Type::kBool:
5878 case DataType::Type::kUint8:
5879 case DataType::Type::kInt8: {
5880 if (value.IsConstant()) {
5881 __ movb(field_addr, Immediate(CodeGenerator::GetInt8ValueOf(value.GetConstant())));
5882 } else {
5883 __ movb(field_addr, value.AsRegister<ByteRegister>());
5884 }
5885 break;
5886 }
5887
5888 case DataType::Type::kUint16:
5889 case DataType::Type::kInt16: {
5890 if (value.IsConstant()) {
5891 __ movw(field_addr, Immediate(CodeGenerator::GetInt16ValueOf(value.GetConstant())));
5892 } else {
5893 __ movw(field_addr, value.AsRegister<Register>());
5894 }
5895 break;
5896 }
5897
5898 case DataType::Type::kInt32:
5899 case DataType::Type::kReference: {
5900 if (kPoisonHeapReferences && needs_write_barrier) {
5901 // Note that in the case where `value` is a null reference,
5902 // we do not enter this block, as the reference does not
5903 // need poisoning.
5904 DCHECK_EQ(field_type, DataType::Type::kReference);
5905 Register temp = locations->GetTemp(0).AsRegister<Register>();
5906 __ movl(temp, value.AsRegister<Register>());
5907 __ PoisonHeapReference(temp);
5908 __ movl(field_addr, temp);
5909 } else if (value.IsConstant()) {
5910 int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
5911 __ movl(field_addr, Immediate(v));
5912 } else {
5913 DCHECK(value.IsRegister()) << value;
5914 __ movl(field_addr, value.AsRegister<Register>());
5915 }
5916 break;
5917 }
5918
5919 case DataType::Type::kInt64: {
5920 if (is_volatile) {
5921 XmmRegister temp1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
5922 XmmRegister temp2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
5923 __ movd(temp1, value.AsRegisterPairLow<Register>());
5924 __ movd(temp2, value.AsRegisterPairHigh<Register>());
5925 __ punpckldq(temp1, temp2);
5926 __ movsd(field_addr, temp1);
5927 codegen_->MaybeRecordImplicitNullCheck(instruction);
5928 } else if (value.IsConstant()) {
5929 int64_t v = CodeGenerator::GetInt64ValueOf(value.GetConstant());
5930 __ movl(field_addr, Immediate(Low32Bits(v)));
5931 codegen_->MaybeRecordImplicitNullCheck(instruction);
5932 __ movl(field_addr.displaceBy(kX86WordSize), Immediate(High32Bits(v)));
5933 } else {
5934 __ movl(field_addr, value.AsRegisterPairLow<Register>());
5935 codegen_->MaybeRecordImplicitNullCheck(instruction);
5936 __ movl(field_addr.displaceBy(kX86WordSize), value.AsRegisterPairHigh<Register>());
5937 }
5938 maybe_record_implicit_null_check_done = true;
5939 break;
5940 }
5941
5942 case DataType::Type::kFloat32: {
5943 if (value.IsConstant()) {
5944 int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
5945 __ movl(field_addr, Immediate(v));
5946 } else {
5947 __ movss(field_addr, value.AsFpuRegister<XmmRegister>());
5948 }
5949 break;
5950 }
5951
5952 case DataType::Type::kFloat64: {
5953 if (value.IsConstant()) {
5954 DCHECK(!is_volatile);
5955 int64_t v = CodeGenerator::GetInt64ValueOf(value.GetConstant());
5956 __ movl(field_addr, Immediate(Low32Bits(v)));
5957 codegen_->MaybeRecordImplicitNullCheck(instruction);
5958 __ movl(field_addr.displaceBy(kX86WordSize), Immediate(High32Bits(v)));
5959 maybe_record_implicit_null_check_done = true;
5960 } else {
5961 __ movsd(field_addr, value.AsFpuRegister<XmmRegister>());
5962 }
5963 break;
5964 }
5965
5966 case DataType::Type::kUint32:
5967 case DataType::Type::kUint64:
5968 case DataType::Type::kVoid:
5969 LOG(FATAL) << "Unreachable type " << field_type;
5970 UNREACHABLE();
5971 }
5972
5973 if (!maybe_record_implicit_null_check_done) {
5974 codegen_->MaybeRecordImplicitNullCheck(instruction);
5975 }
5976
5977 if (needs_write_barrier) {
5978 Register temp = locations->GetTemp(0).AsRegister<Register>();
5979 Register card = locations->GetTemp(1).AsRegister<Register>();
5980 codegen_->MarkGCCard(temp, card, base, value.AsRegister<Register>(), value_can_be_null);
5981 }
5982
5983 if (is_volatile) {
5984 codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
5985 }
5986 }
5987
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info,bool value_can_be_null)5988 void InstructionCodeGeneratorX86::HandleFieldSet(HInstruction* instruction,
5989 const FieldInfo& field_info,
5990 bool value_can_be_null) {
5991 DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
5992
5993 LocationSummary* locations = instruction->GetLocations();
5994 Register base = locations->InAt(0).AsRegister<Register>();
5995 bool is_volatile = field_info.IsVolatile();
5996 DataType::Type field_type = field_info.GetFieldType();
5997 uint32_t offset = field_info.GetFieldOffset().Uint32Value();
5998 bool is_predicated =
5999 instruction->IsInstanceFieldSet() && instruction->AsInstanceFieldSet()->GetIsPredicatedSet();
6000
6001 Address field_addr(base, offset);
6002
6003 NearLabel pred_is_null;
6004 if (is_predicated) {
6005 __ testl(base, base);
6006 __ j(kEqual, &pred_is_null);
6007 }
6008
6009 HandleFieldSet(instruction,
6010 /* value_index= */ 1,
6011 field_type,
6012 field_addr,
6013 base,
6014 is_volatile,
6015 value_can_be_null);
6016
6017 if (is_predicated) {
6018 __ Bind(&pred_is_null);
6019 }
6020 }
6021
VisitStaticFieldGet(HStaticFieldGet * instruction)6022 void LocationsBuilderX86::VisitStaticFieldGet(HStaticFieldGet* instruction) {
6023 HandleFieldGet(instruction, instruction->GetFieldInfo());
6024 }
6025
VisitStaticFieldGet(HStaticFieldGet * instruction)6026 void InstructionCodeGeneratorX86::VisitStaticFieldGet(HStaticFieldGet* instruction) {
6027 HandleFieldGet(instruction, instruction->GetFieldInfo());
6028 }
6029
VisitStaticFieldSet(HStaticFieldSet * instruction)6030 void LocationsBuilderX86::VisitStaticFieldSet(HStaticFieldSet* instruction) {
6031 HandleFieldSet(instruction, instruction->GetFieldInfo());
6032 }
6033
VisitStaticFieldSet(HStaticFieldSet * instruction)6034 void InstructionCodeGeneratorX86::VisitStaticFieldSet(HStaticFieldSet* instruction) {
6035 HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
6036 }
6037
VisitInstanceFieldSet(HInstanceFieldSet * instruction)6038 void LocationsBuilderX86::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
6039 HandleFieldSet(instruction, instruction->GetFieldInfo());
6040 }
6041
VisitInstanceFieldSet(HInstanceFieldSet * instruction)6042 void InstructionCodeGeneratorX86::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
6043 HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
6044 }
6045
VisitPredicatedInstanceFieldGet(HPredicatedInstanceFieldGet * instruction)6046 void LocationsBuilderX86::VisitPredicatedInstanceFieldGet(
6047 HPredicatedInstanceFieldGet* instruction) {
6048 HandleFieldGet(instruction, instruction->GetFieldInfo());
6049 }
6050
VisitInstanceFieldGet(HInstanceFieldGet * instruction)6051 void LocationsBuilderX86::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
6052 HandleFieldGet(instruction, instruction->GetFieldInfo());
6053 }
6054
VisitPredicatedInstanceFieldGet(HPredicatedInstanceFieldGet * instruction)6055 void InstructionCodeGeneratorX86::VisitPredicatedInstanceFieldGet(
6056 HPredicatedInstanceFieldGet* instruction) {
6057 NearLabel finish;
6058 LocationSummary* locations = instruction->GetLocations();
6059 Register recv = locations->InAt(1).AsRegister<Register>();
6060 __ testl(recv, recv);
6061 __ j(kZero, &finish);
6062 HandleFieldGet(instruction, instruction->GetFieldInfo());
6063 __ Bind(&finish);
6064 }
VisitInstanceFieldGet(HInstanceFieldGet * instruction)6065 void InstructionCodeGeneratorX86::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
6066 HandleFieldGet(instruction, instruction->GetFieldInfo());
6067 }
6068
VisitStringBuilderAppend(HStringBuilderAppend * instruction)6069 void LocationsBuilderX86::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
6070 codegen_->CreateStringBuilderAppendLocations(instruction, Location::RegisterLocation(EAX));
6071 }
6072
VisitStringBuilderAppend(HStringBuilderAppend * instruction)6073 void InstructionCodeGeneratorX86::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
6074 __ movl(EAX, Immediate(instruction->GetFormat()->GetValue()));
6075 codegen_->InvokeRuntime(kQuickStringBuilderAppend, instruction, instruction->GetDexPc());
6076 }
6077
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)6078 void LocationsBuilderX86::VisitUnresolvedInstanceFieldGet(
6079 HUnresolvedInstanceFieldGet* instruction) {
6080 FieldAccessCallingConventionX86 calling_convention;
6081 codegen_->CreateUnresolvedFieldLocationSummary(
6082 instruction, instruction->GetFieldType(), calling_convention);
6083 }
6084
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)6085 void InstructionCodeGeneratorX86::VisitUnresolvedInstanceFieldGet(
6086 HUnresolvedInstanceFieldGet* instruction) {
6087 FieldAccessCallingConventionX86 calling_convention;
6088 codegen_->GenerateUnresolvedFieldAccess(instruction,
6089 instruction->GetFieldType(),
6090 instruction->GetFieldIndex(),
6091 instruction->GetDexPc(),
6092 calling_convention);
6093 }
6094
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)6095 void LocationsBuilderX86::VisitUnresolvedInstanceFieldSet(
6096 HUnresolvedInstanceFieldSet* instruction) {
6097 FieldAccessCallingConventionX86 calling_convention;
6098 codegen_->CreateUnresolvedFieldLocationSummary(
6099 instruction, instruction->GetFieldType(), calling_convention);
6100 }
6101
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)6102 void InstructionCodeGeneratorX86::VisitUnresolvedInstanceFieldSet(
6103 HUnresolvedInstanceFieldSet* instruction) {
6104 FieldAccessCallingConventionX86 calling_convention;
6105 codegen_->GenerateUnresolvedFieldAccess(instruction,
6106 instruction->GetFieldType(),
6107 instruction->GetFieldIndex(),
6108 instruction->GetDexPc(),
6109 calling_convention);
6110 }
6111
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)6112 void LocationsBuilderX86::VisitUnresolvedStaticFieldGet(
6113 HUnresolvedStaticFieldGet* instruction) {
6114 FieldAccessCallingConventionX86 calling_convention;
6115 codegen_->CreateUnresolvedFieldLocationSummary(
6116 instruction, instruction->GetFieldType(), calling_convention);
6117 }
6118
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)6119 void InstructionCodeGeneratorX86::VisitUnresolvedStaticFieldGet(
6120 HUnresolvedStaticFieldGet* instruction) {
6121 FieldAccessCallingConventionX86 calling_convention;
6122 codegen_->GenerateUnresolvedFieldAccess(instruction,
6123 instruction->GetFieldType(),
6124 instruction->GetFieldIndex(),
6125 instruction->GetDexPc(),
6126 calling_convention);
6127 }
6128
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)6129 void LocationsBuilderX86::VisitUnresolvedStaticFieldSet(
6130 HUnresolvedStaticFieldSet* instruction) {
6131 FieldAccessCallingConventionX86 calling_convention;
6132 codegen_->CreateUnresolvedFieldLocationSummary(
6133 instruction, instruction->GetFieldType(), calling_convention);
6134 }
6135
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)6136 void InstructionCodeGeneratorX86::VisitUnresolvedStaticFieldSet(
6137 HUnresolvedStaticFieldSet* instruction) {
6138 FieldAccessCallingConventionX86 calling_convention;
6139 codegen_->GenerateUnresolvedFieldAccess(instruction,
6140 instruction->GetFieldType(),
6141 instruction->GetFieldIndex(),
6142 instruction->GetDexPc(),
6143 calling_convention);
6144 }
6145
VisitNullCheck(HNullCheck * instruction)6146 void LocationsBuilderX86::VisitNullCheck(HNullCheck* instruction) {
6147 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
6148 Location loc = codegen_->GetCompilerOptions().GetImplicitNullChecks()
6149 ? Location::RequiresRegister()
6150 : Location::Any();
6151 locations->SetInAt(0, loc);
6152 }
6153
GenerateImplicitNullCheck(HNullCheck * instruction)6154 void CodeGeneratorX86::GenerateImplicitNullCheck(HNullCheck* instruction) {
6155 if (CanMoveNullCheckToUser(instruction)) {
6156 return;
6157 }
6158 LocationSummary* locations = instruction->GetLocations();
6159 Location obj = locations->InAt(0);
6160
6161 __ testl(EAX, Address(obj.AsRegister<Register>(), 0));
6162 RecordPcInfo(instruction, instruction->GetDexPc());
6163 }
6164
GenerateExplicitNullCheck(HNullCheck * instruction)6165 void CodeGeneratorX86::GenerateExplicitNullCheck(HNullCheck* instruction) {
6166 SlowPathCode* slow_path = new (GetScopedAllocator()) NullCheckSlowPathX86(instruction);
6167 AddSlowPath(slow_path);
6168
6169 LocationSummary* locations = instruction->GetLocations();
6170 Location obj = locations->InAt(0);
6171
6172 if (obj.IsRegister()) {
6173 __ testl(obj.AsRegister<Register>(), obj.AsRegister<Register>());
6174 } else if (obj.IsStackSlot()) {
6175 __ cmpl(Address(ESP, obj.GetStackIndex()), Immediate(0));
6176 } else {
6177 DCHECK(obj.IsConstant()) << obj;
6178 DCHECK(obj.GetConstant()->IsNullConstant());
6179 __ jmp(slow_path->GetEntryLabel());
6180 return;
6181 }
6182 __ j(kEqual, slow_path->GetEntryLabel());
6183 }
6184
VisitNullCheck(HNullCheck * instruction)6185 void InstructionCodeGeneratorX86::VisitNullCheck(HNullCheck* instruction) {
6186 codegen_->GenerateNullCheck(instruction);
6187 }
6188
VisitArrayGet(HArrayGet * instruction)6189 void LocationsBuilderX86::VisitArrayGet(HArrayGet* instruction) {
6190 bool object_array_get_with_read_barrier =
6191 kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference);
6192 LocationSummary* locations =
6193 new (GetGraph()->GetAllocator()) LocationSummary(instruction,
6194 object_array_get_with_read_barrier
6195 ? LocationSummary::kCallOnSlowPath
6196 : LocationSummary::kNoCall);
6197 if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
6198 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
6199 }
6200 locations->SetInAt(0, Location::RequiresRegister());
6201 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
6202 if (DataType::IsFloatingPointType(instruction->GetType())) {
6203 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
6204 } else {
6205 // The output overlaps in case of long: we don't want the low move
6206 // to overwrite the array's location. Likewise, in the case of an
6207 // object array get with read barriers enabled, we do not want the
6208 // move to overwrite the array's location, as we need it to emit
6209 // the read barrier.
6210 locations->SetOut(
6211 Location::RequiresRegister(),
6212 (instruction->GetType() == DataType::Type::kInt64 || object_array_get_with_read_barrier)
6213 ? Location::kOutputOverlap
6214 : Location::kNoOutputOverlap);
6215 }
6216 }
6217
VisitArrayGet(HArrayGet * instruction)6218 void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) {
6219 LocationSummary* locations = instruction->GetLocations();
6220 Location obj_loc = locations->InAt(0);
6221 Register obj = obj_loc.AsRegister<Register>();
6222 Location index = locations->InAt(1);
6223 Location out_loc = locations->Out();
6224 uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
6225
6226 DataType::Type type = instruction->GetType();
6227 switch (type) {
6228 case DataType::Type::kBool:
6229 case DataType::Type::kUint8: {
6230 Register out = out_loc.AsRegister<Register>();
6231 __ movzxb(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_1, data_offset));
6232 break;
6233 }
6234
6235 case DataType::Type::kInt8: {
6236 Register out = out_loc.AsRegister<Register>();
6237 __ movsxb(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_1, data_offset));
6238 break;
6239 }
6240
6241 case DataType::Type::kUint16: {
6242 Register out = out_loc.AsRegister<Register>();
6243 if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
6244 // Branch cases into compressed and uncompressed for each index's type.
6245 uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
6246 NearLabel done, not_compressed;
6247 __ testb(Address(obj, count_offset), Immediate(1));
6248 codegen_->MaybeRecordImplicitNullCheck(instruction);
6249 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
6250 "Expecting 0=compressed, 1=uncompressed");
6251 __ j(kNotZero, ¬_compressed);
6252 __ movzxb(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_1, data_offset));
6253 __ jmp(&done);
6254 __ Bind(¬_compressed);
6255 __ movzxw(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_2, data_offset));
6256 __ Bind(&done);
6257 } else {
6258 // Common case for charAt of array of char or when string compression's
6259 // feature is turned off.
6260 __ movzxw(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_2, data_offset));
6261 }
6262 break;
6263 }
6264
6265 case DataType::Type::kInt16: {
6266 Register out = out_loc.AsRegister<Register>();
6267 __ movsxw(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_2, data_offset));
6268 break;
6269 }
6270
6271 case DataType::Type::kInt32: {
6272 Register out = out_loc.AsRegister<Register>();
6273 __ movl(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_4, data_offset));
6274 break;
6275 }
6276
6277 case DataType::Type::kReference: {
6278 static_assert(
6279 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
6280 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
6281 // /* HeapReference<Object> */ out =
6282 // *(obj + data_offset + index * sizeof(HeapReference<Object>))
6283 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
6284 // Note that a potential implicit null check is handled in this
6285 // CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier call.
6286 codegen_->GenerateArrayLoadWithBakerReadBarrier(
6287 instruction, out_loc, obj, data_offset, index, /* needs_null_check= */ true);
6288 } else {
6289 Register out = out_loc.AsRegister<Register>();
6290 __ movl(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_4, data_offset));
6291 codegen_->MaybeRecordImplicitNullCheck(instruction);
6292 // If read barriers are enabled, emit read barriers other than
6293 // Baker's using a slow path (and also unpoison the loaded
6294 // reference, if heap poisoning is enabled).
6295 if (index.IsConstant()) {
6296 uint32_t offset =
6297 (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
6298 codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset);
6299 } else {
6300 codegen_->MaybeGenerateReadBarrierSlow(
6301 instruction, out_loc, out_loc, obj_loc, data_offset, index);
6302 }
6303 }
6304 break;
6305 }
6306
6307 case DataType::Type::kInt64: {
6308 DCHECK_NE(obj, out_loc.AsRegisterPairLow<Register>());
6309 __ movl(out_loc.AsRegisterPairLow<Register>(),
6310 CodeGeneratorX86::ArrayAddress(obj, index, TIMES_8, data_offset));
6311 codegen_->MaybeRecordImplicitNullCheck(instruction);
6312 __ movl(out_loc.AsRegisterPairHigh<Register>(),
6313 CodeGeneratorX86::ArrayAddress(obj, index, TIMES_8, data_offset + kX86WordSize));
6314 break;
6315 }
6316
6317 case DataType::Type::kFloat32: {
6318 XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
6319 __ movss(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_4, data_offset));
6320 break;
6321 }
6322
6323 case DataType::Type::kFloat64: {
6324 XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
6325 __ movsd(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_8, data_offset));
6326 break;
6327 }
6328
6329 case DataType::Type::kUint32:
6330 case DataType::Type::kUint64:
6331 case DataType::Type::kVoid:
6332 LOG(FATAL) << "Unreachable type " << type;
6333 UNREACHABLE();
6334 }
6335
6336 if (type == DataType::Type::kReference || type == DataType::Type::kInt64) {
6337 // Potential implicit null checks, in the case of reference or
6338 // long arrays, are handled in the previous switch statement.
6339 } else {
6340 codegen_->MaybeRecordImplicitNullCheck(instruction);
6341 }
6342 }
6343
VisitArraySet(HArraySet * instruction)6344 void LocationsBuilderX86::VisitArraySet(HArraySet* instruction) {
6345 DataType::Type value_type = instruction->GetComponentType();
6346
6347 bool needs_write_barrier =
6348 CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
6349 bool needs_type_check = instruction->NeedsTypeCheck();
6350
6351 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
6352 instruction,
6353 needs_type_check ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall);
6354
6355 bool is_byte_type = DataType::Size(value_type) == 1u;
6356 // We need the inputs to be different than the output in case of long operation.
6357 // In case of a byte operation, the register allocator does not support multiple
6358 // inputs that die at entry with one in a specific register.
6359 locations->SetInAt(0, Location::RequiresRegister());
6360 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
6361 if (is_byte_type) {
6362 // Ensure the value is in a byte register.
6363 locations->SetInAt(2, Location::ByteRegisterOrConstant(EAX, instruction->InputAt(2)));
6364 } else if (DataType::IsFloatingPointType(value_type)) {
6365 locations->SetInAt(2, Location::FpuRegisterOrConstant(instruction->InputAt(2)));
6366 } else {
6367 locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2)));
6368 }
6369 if (needs_write_barrier) {
6370 // Temporary registers for the write barrier.
6371 locations->AddTemp(Location::RequiresRegister()); // Possibly used for ref. poisoning too.
6372 // Ensure the card is in a byte register.
6373 locations->AddTemp(Location::RegisterLocation(ECX));
6374 }
6375 }
6376
VisitArraySet(HArraySet * instruction)6377 void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) {
6378 LocationSummary* locations = instruction->GetLocations();
6379 Location array_loc = locations->InAt(0);
6380 Register array = array_loc.AsRegister<Register>();
6381 Location index = locations->InAt(1);
6382 Location value = locations->InAt(2);
6383 DataType::Type value_type = instruction->GetComponentType();
6384 bool needs_type_check = instruction->NeedsTypeCheck();
6385 bool needs_write_barrier =
6386 CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
6387
6388 switch (value_type) {
6389 case DataType::Type::kBool:
6390 case DataType::Type::kUint8:
6391 case DataType::Type::kInt8: {
6392 uint32_t offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
6393 Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_1, offset);
6394 if (value.IsRegister()) {
6395 __ movb(address, value.AsRegister<ByteRegister>());
6396 } else {
6397 __ movb(address, Immediate(CodeGenerator::GetInt8ValueOf(value.GetConstant())));
6398 }
6399 codegen_->MaybeRecordImplicitNullCheck(instruction);
6400 break;
6401 }
6402
6403 case DataType::Type::kUint16:
6404 case DataType::Type::kInt16: {
6405 uint32_t offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
6406 Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_2, offset);
6407 if (value.IsRegister()) {
6408 __ movw(address, value.AsRegister<Register>());
6409 } else {
6410 __ movw(address, Immediate(CodeGenerator::GetInt16ValueOf(value.GetConstant())));
6411 }
6412 codegen_->MaybeRecordImplicitNullCheck(instruction);
6413 break;
6414 }
6415
6416 case DataType::Type::kReference: {
6417 uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
6418 Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_4, offset);
6419
6420 if (!value.IsRegister()) {
6421 // Just setting null.
6422 DCHECK(instruction->InputAt(2)->IsNullConstant());
6423 DCHECK(value.IsConstant()) << value;
6424 __ movl(address, Immediate(0));
6425 codegen_->MaybeRecordImplicitNullCheck(instruction);
6426 DCHECK(!needs_write_barrier);
6427 DCHECK(!needs_type_check);
6428 break;
6429 }
6430
6431 DCHECK(needs_write_barrier);
6432 Register register_value = value.AsRegister<Register>();
6433 Location temp_loc = locations->GetTemp(0);
6434 Register temp = temp_loc.AsRegister<Register>();
6435
6436 bool can_value_be_null = instruction->GetValueCanBeNull();
6437 NearLabel do_store;
6438 if (can_value_be_null) {
6439 __ testl(register_value, register_value);
6440 __ j(kEqual, &do_store);
6441 }
6442
6443 SlowPathCode* slow_path = nullptr;
6444 if (needs_type_check) {
6445 slow_path = new (codegen_->GetScopedAllocator()) ArraySetSlowPathX86(instruction);
6446 codegen_->AddSlowPath(slow_path);
6447
6448 const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
6449 const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
6450 const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
6451
6452 // Note that when Baker read barriers are enabled, the type
6453 // checks are performed without read barriers. This is fine,
6454 // even in the case where a class object is in the from-space
6455 // after the flip, as a comparison involving such a type would
6456 // not produce a false positive; it may of course produce a
6457 // false negative, in which case we would take the ArraySet
6458 // slow path.
6459
6460 // /* HeapReference<Class> */ temp = array->klass_
6461 __ movl(temp, Address(array, class_offset));
6462 codegen_->MaybeRecordImplicitNullCheck(instruction);
6463 __ MaybeUnpoisonHeapReference(temp);
6464
6465 // /* HeapReference<Class> */ temp = temp->component_type_
6466 __ movl(temp, Address(temp, component_offset));
6467 // If heap poisoning is enabled, no need to unpoison `temp`
6468 // nor the object reference in `register_value->klass`, as
6469 // we are comparing two poisoned references.
6470 __ cmpl(temp, Address(register_value, class_offset));
6471
6472 if (instruction->StaticTypeOfArrayIsObjectArray()) {
6473 NearLabel do_put;
6474 __ j(kEqual, &do_put);
6475 // If heap poisoning is enabled, the `temp` reference has
6476 // not been unpoisoned yet; unpoison it now.
6477 __ MaybeUnpoisonHeapReference(temp);
6478
6479 // If heap poisoning is enabled, no need to unpoison the
6480 // heap reference loaded below, as it is only used for a
6481 // comparison with null.
6482 __ cmpl(Address(temp, super_offset), Immediate(0));
6483 __ j(kNotEqual, slow_path->GetEntryLabel());
6484 __ Bind(&do_put);
6485 } else {
6486 __ j(kNotEqual, slow_path->GetEntryLabel());
6487 }
6488 }
6489
6490 Register card = locations->GetTemp(1).AsRegister<Register>();
6491 codegen_->MarkGCCard(
6492 temp, card, array, value.AsRegister<Register>(), /* value_can_be_null= */ false);
6493
6494 if (can_value_be_null) {
6495 DCHECK(do_store.IsLinked());
6496 __ Bind(&do_store);
6497 }
6498
6499 Register source = register_value;
6500 if (kPoisonHeapReferences) {
6501 __ movl(temp, register_value);
6502 __ PoisonHeapReference(temp);
6503 source = temp;
6504 }
6505
6506 __ movl(address, source);
6507
6508 if (can_value_be_null || !needs_type_check) {
6509 codegen_->MaybeRecordImplicitNullCheck(instruction);
6510 }
6511
6512 if (slow_path != nullptr) {
6513 __ Bind(slow_path->GetExitLabel());
6514 }
6515
6516 break;
6517 }
6518
6519 case DataType::Type::kInt32: {
6520 uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
6521 Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_4, offset);
6522 if (value.IsRegister()) {
6523 __ movl(address, value.AsRegister<Register>());
6524 } else {
6525 DCHECK(value.IsConstant()) << value;
6526 int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
6527 __ movl(address, Immediate(v));
6528 }
6529 codegen_->MaybeRecordImplicitNullCheck(instruction);
6530 break;
6531 }
6532
6533 case DataType::Type::kInt64: {
6534 uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
6535 if (value.IsRegisterPair()) {
6536 __ movl(CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, data_offset),
6537 value.AsRegisterPairLow<Register>());
6538 codegen_->MaybeRecordImplicitNullCheck(instruction);
6539 __ movl(CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, data_offset + kX86WordSize),
6540 value.AsRegisterPairHigh<Register>());
6541 } else {
6542 DCHECK(value.IsConstant());
6543 int64_t val = value.GetConstant()->AsLongConstant()->GetValue();
6544 __ movl(CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, data_offset),
6545 Immediate(Low32Bits(val)));
6546 codegen_->MaybeRecordImplicitNullCheck(instruction);
6547 __ movl(CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, data_offset + kX86WordSize),
6548 Immediate(High32Bits(val)));
6549 }
6550 break;
6551 }
6552
6553 case DataType::Type::kFloat32: {
6554 uint32_t offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
6555 Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_4, offset);
6556 if (value.IsFpuRegister()) {
6557 __ movss(address, value.AsFpuRegister<XmmRegister>());
6558 } else {
6559 DCHECK(value.IsConstant());
6560 int32_t v = bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue());
6561 __ movl(address, Immediate(v));
6562 }
6563 codegen_->MaybeRecordImplicitNullCheck(instruction);
6564 break;
6565 }
6566
6567 case DataType::Type::kFloat64: {
6568 uint32_t offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
6569 Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, offset);
6570 if (value.IsFpuRegister()) {
6571 __ movsd(address, value.AsFpuRegister<XmmRegister>());
6572 } else {
6573 DCHECK(value.IsConstant());
6574 Address address_hi =
6575 CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, offset + kX86WordSize);
6576 int64_t v = bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue());
6577 __ movl(address, Immediate(Low32Bits(v)));
6578 codegen_->MaybeRecordImplicitNullCheck(instruction);
6579 __ movl(address_hi, Immediate(High32Bits(v)));
6580 }
6581 break;
6582 }
6583
6584 case DataType::Type::kUint32:
6585 case DataType::Type::kUint64:
6586 case DataType::Type::kVoid:
6587 LOG(FATAL) << "Unreachable type " << instruction->GetType();
6588 UNREACHABLE();
6589 }
6590 }
6591
VisitArrayLength(HArrayLength * instruction)6592 void LocationsBuilderX86::VisitArrayLength(HArrayLength* instruction) {
6593 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
6594 locations->SetInAt(0, Location::RequiresRegister());
6595 if (!instruction->IsEmittedAtUseSite()) {
6596 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
6597 }
6598 }
6599
VisitArrayLength(HArrayLength * instruction)6600 void InstructionCodeGeneratorX86::VisitArrayLength(HArrayLength* instruction) {
6601 if (instruction->IsEmittedAtUseSite()) {
6602 return;
6603 }
6604
6605 LocationSummary* locations = instruction->GetLocations();
6606 uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction);
6607 Register obj = locations->InAt(0).AsRegister<Register>();
6608 Register out = locations->Out().AsRegister<Register>();
6609 __ movl(out, Address(obj, offset));
6610 codegen_->MaybeRecordImplicitNullCheck(instruction);
6611 // Mask out most significant bit in case the array is String's array of char.
6612 if (mirror::kUseStringCompression && instruction->IsStringLength()) {
6613 __ shrl(out, Immediate(1));
6614 }
6615 }
6616
VisitBoundsCheck(HBoundsCheck * instruction)6617 void LocationsBuilderX86::VisitBoundsCheck(HBoundsCheck* instruction) {
6618 RegisterSet caller_saves = RegisterSet::Empty();
6619 InvokeRuntimeCallingConvention calling_convention;
6620 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
6621 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
6622 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves);
6623 locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
6624 HInstruction* length = instruction->InputAt(1);
6625 if (!length->IsEmittedAtUseSite()) {
6626 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
6627 }
6628 // Need register to see array's length.
6629 if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
6630 locations->AddTemp(Location::RequiresRegister());
6631 }
6632 }
6633
VisitBoundsCheck(HBoundsCheck * instruction)6634 void InstructionCodeGeneratorX86::VisitBoundsCheck(HBoundsCheck* instruction) {
6635 const bool is_string_compressed_char_at =
6636 mirror::kUseStringCompression && instruction->IsStringCharAt();
6637 LocationSummary* locations = instruction->GetLocations();
6638 Location index_loc = locations->InAt(0);
6639 Location length_loc = locations->InAt(1);
6640 SlowPathCode* slow_path =
6641 new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathX86(instruction);
6642
6643 if (length_loc.IsConstant()) {
6644 int32_t length = CodeGenerator::GetInt32ValueOf(length_loc.GetConstant());
6645 if (index_loc.IsConstant()) {
6646 // BCE will remove the bounds check if we are guarenteed to pass.
6647 int32_t index = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
6648 if (index < 0 || index >= length) {
6649 codegen_->AddSlowPath(slow_path);
6650 __ jmp(slow_path->GetEntryLabel());
6651 } else {
6652 // Some optimization after BCE may have generated this, and we should not
6653 // generate a bounds check if it is a valid range.
6654 }
6655 return;
6656 }
6657
6658 // We have to reverse the jump condition because the length is the constant.
6659 Register index_reg = index_loc.AsRegister<Register>();
6660 __ cmpl(index_reg, Immediate(length));
6661 codegen_->AddSlowPath(slow_path);
6662 __ j(kAboveEqual, slow_path->GetEntryLabel());
6663 } else {
6664 HInstruction* array_length = instruction->InputAt(1);
6665 if (array_length->IsEmittedAtUseSite()) {
6666 // Address the length field in the array.
6667 DCHECK(array_length->IsArrayLength());
6668 uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(array_length->AsArrayLength());
6669 Location array_loc = array_length->GetLocations()->InAt(0);
6670 Address array_len(array_loc.AsRegister<Register>(), len_offset);
6671 if (is_string_compressed_char_at) {
6672 // TODO: if index_loc.IsConstant(), compare twice the index (to compensate for
6673 // the string compression flag) with the in-memory length and avoid the temporary.
6674 Register length_reg = locations->GetTemp(0).AsRegister<Register>();
6675 __ movl(length_reg, array_len);
6676 codegen_->MaybeRecordImplicitNullCheck(array_length);
6677 __ shrl(length_reg, Immediate(1));
6678 codegen_->GenerateIntCompare(length_reg, index_loc);
6679 } else {
6680 // Checking bounds for general case:
6681 // Array of char or string's array with feature compression off.
6682 if (index_loc.IsConstant()) {
6683 int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
6684 __ cmpl(array_len, Immediate(value));
6685 } else {
6686 __ cmpl(array_len, index_loc.AsRegister<Register>());
6687 }
6688 codegen_->MaybeRecordImplicitNullCheck(array_length);
6689 }
6690 } else {
6691 codegen_->GenerateIntCompare(length_loc, index_loc);
6692 }
6693 codegen_->AddSlowPath(slow_path);
6694 __ j(kBelowEqual, slow_path->GetEntryLabel());
6695 }
6696 }
6697
VisitParallelMove(HParallelMove * instruction ATTRIBUTE_UNUSED)6698 void LocationsBuilderX86::VisitParallelMove(HParallelMove* instruction ATTRIBUTE_UNUSED) {
6699 LOG(FATAL) << "Unreachable";
6700 }
6701
VisitParallelMove(HParallelMove * instruction)6702 void InstructionCodeGeneratorX86::VisitParallelMove(HParallelMove* instruction) {
6703 if (instruction->GetNext()->IsSuspendCheck() &&
6704 instruction->GetBlock()->GetLoopInformation() != nullptr) {
6705 HSuspendCheck* suspend_check = instruction->GetNext()->AsSuspendCheck();
6706 // The back edge will generate the suspend check.
6707 codegen_->ClearSpillSlotsFromLoopPhisInStackMap(suspend_check, instruction);
6708 }
6709
6710 codegen_->GetMoveResolver()->EmitNativeCode(instruction);
6711 }
6712
VisitSuspendCheck(HSuspendCheck * instruction)6713 void LocationsBuilderX86::VisitSuspendCheck(HSuspendCheck* instruction) {
6714 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
6715 instruction, LocationSummary::kCallOnSlowPath);
6716 // In suspend check slow path, usually there are no caller-save registers at all.
6717 // If SIMD instructions are present, however, we force spilling all live SIMD
6718 // registers in full width (since the runtime only saves/restores lower part).
6719 locations->SetCustomSlowPathCallerSaves(
6720 GetGraph()->HasSIMD() ? RegisterSet::AllFpu() : RegisterSet::Empty());
6721 }
6722
VisitSuspendCheck(HSuspendCheck * instruction)6723 void InstructionCodeGeneratorX86::VisitSuspendCheck(HSuspendCheck* instruction) {
6724 HBasicBlock* block = instruction->GetBlock();
6725 if (block->GetLoopInformation() != nullptr) {
6726 DCHECK(block->GetLoopInformation()->GetSuspendCheck() == instruction);
6727 // The back edge will generate the suspend check.
6728 return;
6729 }
6730 if (block->IsEntryBlock() && instruction->GetNext()->IsGoto()) {
6731 // The goto will generate the suspend check.
6732 return;
6733 }
6734 GenerateSuspendCheck(instruction, nullptr);
6735 }
6736
GenerateSuspendCheck(HSuspendCheck * instruction,HBasicBlock * successor)6737 void InstructionCodeGeneratorX86::GenerateSuspendCheck(HSuspendCheck* instruction,
6738 HBasicBlock* successor) {
6739 SuspendCheckSlowPathX86* slow_path =
6740 down_cast<SuspendCheckSlowPathX86*>(instruction->GetSlowPath());
6741 if (slow_path == nullptr) {
6742 slow_path =
6743 new (codegen_->GetScopedAllocator()) SuspendCheckSlowPathX86(instruction, successor);
6744 instruction->SetSlowPath(slow_path);
6745 codegen_->AddSlowPath(slow_path);
6746 if (successor != nullptr) {
6747 DCHECK(successor->IsLoopHeader());
6748 }
6749 } else {
6750 DCHECK_EQ(slow_path->GetSuccessor(), successor);
6751 }
6752
6753 __ fs()->cmpw(Address::Absolute(Thread::ThreadFlagsOffset<kX86PointerSize>().Int32Value()),
6754 Immediate(0));
6755 if (successor == nullptr) {
6756 __ j(kNotEqual, slow_path->GetEntryLabel());
6757 __ Bind(slow_path->GetReturnLabel());
6758 } else {
6759 __ j(kEqual, codegen_->GetLabelOf(successor));
6760 __ jmp(slow_path->GetEntryLabel());
6761 }
6762 }
6763
GetAssembler() const6764 X86Assembler* ParallelMoveResolverX86::GetAssembler() const {
6765 return codegen_->GetAssembler();
6766 }
6767
MoveMemoryToMemory(int dst,int src,int number_of_words)6768 void ParallelMoveResolverX86::MoveMemoryToMemory(int dst, int src, int number_of_words) {
6769 ScratchRegisterScope ensure_scratch(
6770 this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
6771 Register temp_reg = static_cast<Register>(ensure_scratch.GetRegister());
6772 int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0;
6773
6774 // Now that temp register is available (possibly spilled), move blocks of memory.
6775 for (int i = 0; i < number_of_words; i++) {
6776 __ movl(temp_reg, Address(ESP, src + stack_offset));
6777 __ movl(Address(ESP, dst + stack_offset), temp_reg);
6778 stack_offset += kX86WordSize;
6779 }
6780 }
6781
EmitMove(size_t index)6782 void ParallelMoveResolverX86::EmitMove(size_t index) {
6783 MoveOperands* move = moves_[index];
6784 Location source = move->GetSource();
6785 Location destination = move->GetDestination();
6786
6787 if (source.IsRegister()) {
6788 if (destination.IsRegister()) {
6789 __ movl(destination.AsRegister<Register>(), source.AsRegister<Register>());
6790 } else if (destination.IsFpuRegister()) {
6791 __ movd(destination.AsFpuRegister<XmmRegister>(), source.AsRegister<Register>());
6792 } else {
6793 DCHECK(destination.IsStackSlot());
6794 __ movl(Address(ESP, destination.GetStackIndex()), source.AsRegister<Register>());
6795 }
6796 } else if (source.IsRegisterPair()) {
6797 if (destination.IsRegisterPair()) {
6798 __ movl(destination.AsRegisterPairLow<Register>(), source.AsRegisterPairLow<Register>());
6799 DCHECK_NE(destination.AsRegisterPairLow<Register>(), source.AsRegisterPairHigh<Register>());
6800 __ movl(destination.AsRegisterPairHigh<Register>(), source.AsRegisterPairHigh<Register>());
6801 } else if (destination.IsFpuRegister()) {
6802 size_t elem_size = DataType::Size(DataType::Type::kInt32);
6803 // Push the 2 source registers to the stack.
6804 __ pushl(source.AsRegisterPairHigh<Register>());
6805 __ cfi().AdjustCFAOffset(elem_size);
6806 __ pushl(source.AsRegisterPairLow<Register>());
6807 __ cfi().AdjustCFAOffset(elem_size);
6808 // Load the destination register.
6809 __ movsd(destination.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
6810 // And remove the temporary stack space we allocated.
6811 codegen_->DecreaseFrame(2 * elem_size);
6812 } else {
6813 DCHECK(destination.IsDoubleStackSlot());
6814 __ movl(Address(ESP, destination.GetStackIndex()), source.AsRegisterPairLow<Register>());
6815 __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)),
6816 source.AsRegisterPairHigh<Register>());
6817 }
6818 } else if (source.IsFpuRegister()) {
6819 if (destination.IsRegister()) {
6820 __ movd(destination.AsRegister<Register>(), source.AsFpuRegister<XmmRegister>());
6821 } else if (destination.IsFpuRegister()) {
6822 __ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
6823 } else if (destination.IsRegisterPair()) {
6824 size_t elem_size = DataType::Size(DataType::Type::kInt32);
6825 // Create stack space for 2 elements.
6826 codegen_->IncreaseFrame(2 * elem_size);
6827 // Store the source register.
6828 __ movsd(Address(ESP, 0), source.AsFpuRegister<XmmRegister>());
6829 // And pop the values into destination registers.
6830 __ popl(destination.AsRegisterPairLow<Register>());
6831 __ cfi().AdjustCFAOffset(-elem_size);
6832 __ popl(destination.AsRegisterPairHigh<Register>());
6833 __ cfi().AdjustCFAOffset(-elem_size);
6834 } else if (destination.IsStackSlot()) {
6835 __ movss(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
6836 } else if (destination.IsDoubleStackSlot()) {
6837 __ movsd(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
6838 } else {
6839 DCHECK(destination.IsSIMDStackSlot());
6840 __ movups(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
6841 }
6842 } else if (source.IsStackSlot()) {
6843 if (destination.IsRegister()) {
6844 __ movl(destination.AsRegister<Register>(), Address(ESP, source.GetStackIndex()));
6845 } else if (destination.IsFpuRegister()) {
6846 __ movss(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex()));
6847 } else {
6848 DCHECK(destination.IsStackSlot());
6849 MoveMemoryToMemory(destination.GetStackIndex(), source.GetStackIndex(), 1);
6850 }
6851 } else if (source.IsDoubleStackSlot()) {
6852 if (destination.IsRegisterPair()) {
6853 __ movl(destination.AsRegisterPairLow<Register>(), Address(ESP, source.GetStackIndex()));
6854 __ movl(destination.AsRegisterPairHigh<Register>(),
6855 Address(ESP, source.GetHighStackIndex(kX86WordSize)));
6856 } else if (destination.IsFpuRegister()) {
6857 __ movsd(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex()));
6858 } else {
6859 DCHECK(destination.IsDoubleStackSlot()) << destination;
6860 MoveMemoryToMemory(destination.GetStackIndex(), source.GetStackIndex(), 2);
6861 }
6862 } else if (source.IsSIMDStackSlot()) {
6863 if (destination.IsFpuRegister()) {
6864 __ movups(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex()));
6865 } else {
6866 DCHECK(destination.IsSIMDStackSlot());
6867 MoveMemoryToMemory(destination.GetStackIndex(), source.GetStackIndex(), 4);
6868 }
6869 } else if (source.IsConstant()) {
6870 HConstant* constant = source.GetConstant();
6871 if (constant->IsIntConstant() || constant->IsNullConstant()) {
6872 int32_t value = CodeGenerator::GetInt32ValueOf(constant);
6873 if (destination.IsRegister()) {
6874 if (value == 0) {
6875 __ xorl(destination.AsRegister<Register>(), destination.AsRegister<Register>());
6876 } else {
6877 __ movl(destination.AsRegister<Register>(), Immediate(value));
6878 }
6879 } else {
6880 DCHECK(destination.IsStackSlot()) << destination;
6881 __ movl(Address(ESP, destination.GetStackIndex()), Immediate(value));
6882 }
6883 } else if (constant->IsFloatConstant()) {
6884 float fp_value = constant->AsFloatConstant()->GetValue();
6885 int32_t value = bit_cast<int32_t, float>(fp_value);
6886 Immediate imm(value);
6887 if (destination.IsFpuRegister()) {
6888 XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
6889 if (value == 0) {
6890 // Easy handling of 0.0.
6891 __ xorps(dest, dest);
6892 } else {
6893 ScratchRegisterScope ensure_scratch(
6894 this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
6895 Register temp = static_cast<Register>(ensure_scratch.GetRegister());
6896 __ movl(temp, Immediate(value));
6897 __ movd(dest, temp);
6898 }
6899 } else {
6900 DCHECK(destination.IsStackSlot()) << destination;
6901 __ movl(Address(ESP, destination.GetStackIndex()), imm);
6902 }
6903 } else if (constant->IsLongConstant()) {
6904 int64_t value = constant->AsLongConstant()->GetValue();
6905 int32_t low_value = Low32Bits(value);
6906 int32_t high_value = High32Bits(value);
6907 Immediate low(low_value);
6908 Immediate high(high_value);
6909 if (destination.IsDoubleStackSlot()) {
6910 __ movl(Address(ESP, destination.GetStackIndex()), low);
6911 __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)), high);
6912 } else {
6913 __ movl(destination.AsRegisterPairLow<Register>(), low);
6914 __ movl(destination.AsRegisterPairHigh<Register>(), high);
6915 }
6916 } else {
6917 DCHECK(constant->IsDoubleConstant());
6918 double dbl_value = constant->AsDoubleConstant()->GetValue();
6919 int64_t value = bit_cast<int64_t, double>(dbl_value);
6920 int32_t low_value = Low32Bits(value);
6921 int32_t high_value = High32Bits(value);
6922 Immediate low(low_value);
6923 Immediate high(high_value);
6924 if (destination.IsFpuRegister()) {
6925 XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
6926 if (value == 0) {
6927 // Easy handling of 0.0.
6928 __ xorpd(dest, dest);
6929 } else {
6930 __ pushl(high);
6931 __ cfi().AdjustCFAOffset(4);
6932 __ pushl(low);
6933 __ cfi().AdjustCFAOffset(4);
6934 __ movsd(dest, Address(ESP, 0));
6935 codegen_->DecreaseFrame(8);
6936 }
6937 } else {
6938 DCHECK(destination.IsDoubleStackSlot()) << destination;
6939 __ movl(Address(ESP, destination.GetStackIndex()), low);
6940 __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)), high);
6941 }
6942 }
6943 } else {
6944 LOG(FATAL) << "Unimplemented move: " << destination << " <- " << source;
6945 }
6946 }
6947
Exchange(Register reg,int mem)6948 void ParallelMoveResolverX86::Exchange(Register reg, int mem) {
6949 Register suggested_scratch = reg == EAX ? EBX : EAX;
6950 ScratchRegisterScope ensure_scratch(
6951 this, reg, suggested_scratch, codegen_->GetNumberOfCoreRegisters());
6952
6953 int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0;
6954 __ movl(static_cast<Register>(ensure_scratch.GetRegister()), Address(ESP, mem + stack_offset));
6955 __ movl(Address(ESP, mem + stack_offset), reg);
6956 __ movl(reg, static_cast<Register>(ensure_scratch.GetRegister()));
6957 }
6958
Exchange32(XmmRegister reg,int mem)6959 void ParallelMoveResolverX86::Exchange32(XmmRegister reg, int mem) {
6960 ScratchRegisterScope ensure_scratch(
6961 this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
6962
6963 Register temp_reg = static_cast<Register>(ensure_scratch.GetRegister());
6964 int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0;
6965 __ movl(temp_reg, Address(ESP, mem + stack_offset));
6966 __ movss(Address(ESP, mem + stack_offset), reg);
6967 __ movd(reg, temp_reg);
6968 }
6969
Exchange128(XmmRegister reg,int mem)6970 void ParallelMoveResolverX86::Exchange128(XmmRegister reg, int mem) {
6971 size_t extra_slot = 4 * kX86WordSize;
6972 codegen_->IncreaseFrame(extra_slot);
6973 __ movups(Address(ESP, 0), XmmRegister(reg));
6974 ExchangeMemory(0, mem + extra_slot, 4);
6975 __ movups(XmmRegister(reg), Address(ESP, 0));
6976 codegen_->DecreaseFrame(extra_slot);
6977 }
6978
ExchangeMemory(int mem1,int mem2,int number_of_words)6979 void ParallelMoveResolverX86::ExchangeMemory(int mem1, int mem2, int number_of_words) {
6980 ScratchRegisterScope ensure_scratch1(
6981 this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
6982
6983 Register suggested_scratch = ensure_scratch1.GetRegister() == EAX ? EBX : EAX;
6984 ScratchRegisterScope ensure_scratch2(
6985 this, ensure_scratch1.GetRegister(), suggested_scratch, codegen_->GetNumberOfCoreRegisters());
6986
6987 int stack_offset = ensure_scratch1.IsSpilled() ? kX86WordSize : 0;
6988 stack_offset += ensure_scratch2.IsSpilled() ? kX86WordSize : 0;
6989
6990 // Now that temp registers are available (possibly spilled), exchange blocks of memory.
6991 for (int i = 0; i < number_of_words; i++) {
6992 __ movl(static_cast<Register>(ensure_scratch1.GetRegister()), Address(ESP, mem1 + stack_offset));
6993 __ movl(static_cast<Register>(ensure_scratch2.GetRegister()), Address(ESP, mem2 + stack_offset));
6994 __ movl(Address(ESP, mem2 + stack_offset), static_cast<Register>(ensure_scratch1.GetRegister()));
6995 __ movl(Address(ESP, mem1 + stack_offset), static_cast<Register>(ensure_scratch2.GetRegister()));
6996 stack_offset += kX86WordSize;
6997 }
6998 }
6999
EmitSwap(size_t index)7000 void ParallelMoveResolverX86::EmitSwap(size_t index) {
7001 MoveOperands* move = moves_[index];
7002 Location source = move->GetSource();
7003 Location destination = move->GetDestination();
7004
7005 if (source.IsRegister() && destination.IsRegister()) {
7006 // Use XOR swap algorithm to avoid serializing XCHG instruction or using a temporary.
7007 DCHECK_NE(destination.AsRegister<Register>(), source.AsRegister<Register>());
7008 __ xorl(destination.AsRegister<Register>(), source.AsRegister<Register>());
7009 __ xorl(source.AsRegister<Register>(), destination.AsRegister<Register>());
7010 __ xorl(destination.AsRegister<Register>(), source.AsRegister<Register>());
7011 } else if (source.IsRegister() && destination.IsStackSlot()) {
7012 Exchange(source.AsRegister<Register>(), destination.GetStackIndex());
7013 } else if (source.IsStackSlot() && destination.IsRegister()) {
7014 Exchange(destination.AsRegister<Register>(), source.GetStackIndex());
7015 } else if (source.IsStackSlot() && destination.IsStackSlot()) {
7016 ExchangeMemory(destination.GetStackIndex(), source.GetStackIndex(), 1);
7017 } else if (source.IsFpuRegister() && destination.IsFpuRegister()) {
7018 // Use XOR Swap algorithm to avoid a temporary.
7019 DCHECK_NE(source.reg(), destination.reg());
7020 __ xorpd(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
7021 __ xorpd(source.AsFpuRegister<XmmRegister>(), destination.AsFpuRegister<XmmRegister>());
7022 __ xorpd(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
7023 } else if (source.IsFpuRegister() && destination.IsStackSlot()) {
7024 Exchange32(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
7025 } else if (destination.IsFpuRegister() && source.IsStackSlot()) {
7026 Exchange32(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
7027 } else if (source.IsFpuRegister() && destination.IsDoubleStackSlot()) {
7028 // Take advantage of the 16 bytes in the XMM register.
7029 XmmRegister reg = source.AsFpuRegister<XmmRegister>();
7030 Address stack(ESP, destination.GetStackIndex());
7031 // Load the double into the high doubleword.
7032 __ movhpd(reg, stack);
7033
7034 // Store the low double into the destination.
7035 __ movsd(stack, reg);
7036
7037 // Move the high double to the low double.
7038 __ psrldq(reg, Immediate(8));
7039 } else if (destination.IsFpuRegister() && source.IsDoubleStackSlot()) {
7040 // Take advantage of the 16 bytes in the XMM register.
7041 XmmRegister reg = destination.AsFpuRegister<XmmRegister>();
7042 Address stack(ESP, source.GetStackIndex());
7043 // Load the double into the high doubleword.
7044 __ movhpd(reg, stack);
7045
7046 // Store the low double into the destination.
7047 __ movsd(stack, reg);
7048
7049 // Move the high double to the low double.
7050 __ psrldq(reg, Immediate(8));
7051 } else if (destination.IsDoubleStackSlot() && source.IsDoubleStackSlot()) {
7052 ExchangeMemory(destination.GetStackIndex(), source.GetStackIndex(), 2);
7053 } else if (source.IsSIMDStackSlot() && destination.IsSIMDStackSlot()) {
7054 ExchangeMemory(destination.GetStackIndex(), source.GetStackIndex(), 4);
7055 } else if (source.IsFpuRegister() && destination.IsSIMDStackSlot()) {
7056 Exchange128(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
7057 } else if (destination.IsFpuRegister() && source.IsSIMDStackSlot()) {
7058 Exchange128(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
7059 } else {
7060 LOG(FATAL) << "Unimplemented: source: " << source << ", destination: " << destination;
7061 }
7062 }
7063
SpillScratch(int reg)7064 void ParallelMoveResolverX86::SpillScratch(int reg) {
7065 __ pushl(static_cast<Register>(reg));
7066 }
7067
RestoreScratch(int reg)7068 void ParallelMoveResolverX86::RestoreScratch(int reg) {
7069 __ popl(static_cast<Register>(reg));
7070 }
7071
GetSupportedLoadClassKind(HLoadClass::LoadKind desired_class_load_kind)7072 HLoadClass::LoadKind CodeGeneratorX86::GetSupportedLoadClassKind(
7073 HLoadClass::LoadKind desired_class_load_kind) {
7074 switch (desired_class_load_kind) {
7075 case HLoadClass::LoadKind::kInvalid:
7076 LOG(FATAL) << "UNREACHABLE";
7077 UNREACHABLE();
7078 case HLoadClass::LoadKind::kReferrersClass:
7079 break;
7080 case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
7081 case HLoadClass::LoadKind::kBootImageRelRo:
7082 case HLoadClass::LoadKind::kBssEntry:
7083 case HLoadClass::LoadKind::kBssEntryPublic:
7084 case HLoadClass::LoadKind::kBssEntryPackage:
7085 DCHECK(!GetCompilerOptions().IsJitCompiler());
7086 break;
7087 case HLoadClass::LoadKind::kJitBootImageAddress:
7088 case HLoadClass::LoadKind::kJitTableAddress:
7089 DCHECK(GetCompilerOptions().IsJitCompiler());
7090 break;
7091 case HLoadClass::LoadKind::kRuntimeCall:
7092 break;
7093 }
7094 return desired_class_load_kind;
7095 }
7096
VisitLoadClass(HLoadClass * cls)7097 void LocationsBuilderX86::VisitLoadClass(HLoadClass* cls) {
7098 HLoadClass::LoadKind load_kind = cls->GetLoadKind();
7099 if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
7100 InvokeRuntimeCallingConvention calling_convention;
7101 CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(
7102 cls,
7103 Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
7104 Location::RegisterLocation(EAX));
7105 DCHECK_EQ(calling_convention.GetRegisterAt(0), EAX);
7106 return;
7107 }
7108 DCHECK_EQ(cls->NeedsAccessCheck(),
7109 load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
7110 load_kind == HLoadClass::LoadKind::kBssEntryPackage);
7111
7112 const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage();
7113 LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier)
7114 ? LocationSummary::kCallOnSlowPath
7115 : LocationSummary::kNoCall;
7116 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(cls, call_kind);
7117 if (kUseBakerReadBarrier && requires_read_barrier && !cls->NeedsEnvironment()) {
7118 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
7119 }
7120
7121 if (load_kind == HLoadClass::LoadKind::kReferrersClass || cls->HasPcRelativeLoadKind()) {
7122 locations->SetInAt(0, Location::RequiresRegister());
7123 }
7124 locations->SetOut(Location::RequiresRegister());
7125 if (call_kind == LocationSummary::kCallOnSlowPath && cls->HasPcRelativeLoadKind()) {
7126 if (!kUseReadBarrier || kUseBakerReadBarrier) {
7127 // Rely on the type resolution and/or initialization to save everything.
7128 locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
7129 } else {
7130 // For non-Baker read barrier we have a temp-clobbering call.
7131 }
7132 }
7133 }
7134
NewJitRootClassPatch(const DexFile & dex_file,dex::TypeIndex type_index,Handle<mirror::Class> handle)7135 Label* CodeGeneratorX86::NewJitRootClassPatch(const DexFile& dex_file,
7136 dex::TypeIndex type_index,
7137 Handle<mirror::Class> handle) {
7138 ReserveJitClassRoot(TypeReference(&dex_file, type_index), handle);
7139 // Add a patch entry and return the label.
7140 jit_class_patches_.emplace_back(&dex_file, type_index.index_);
7141 PatchInfo<Label>* info = &jit_class_patches_.back();
7142 return &info->label;
7143 }
7144
7145 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
7146 // move.
VisitLoadClass(HLoadClass * cls)7147 void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS {
7148 HLoadClass::LoadKind load_kind = cls->GetLoadKind();
7149 if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
7150 codegen_->GenerateLoadClassRuntimeCall(cls);
7151 return;
7152 }
7153 DCHECK_EQ(cls->NeedsAccessCheck(),
7154 load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
7155 load_kind == HLoadClass::LoadKind::kBssEntryPackage);
7156
7157 LocationSummary* locations = cls->GetLocations();
7158 Location out_loc = locations->Out();
7159 Register out = out_loc.AsRegister<Register>();
7160
7161 bool generate_null_check = false;
7162 const ReadBarrierOption read_barrier_option = cls->IsInBootImage()
7163 ? kWithoutReadBarrier
7164 : kCompilerReadBarrierOption;
7165 switch (load_kind) {
7166 case HLoadClass::LoadKind::kReferrersClass: {
7167 DCHECK(!cls->CanCallRuntime());
7168 DCHECK(!cls->MustGenerateClinitCheck());
7169 // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
7170 Register current_method = locations->InAt(0).AsRegister<Register>();
7171 GenerateGcRootFieldLoad(
7172 cls,
7173 out_loc,
7174 Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()),
7175 /* fixup_label= */ nullptr,
7176 read_barrier_option);
7177 break;
7178 }
7179 case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: {
7180 DCHECK(codegen_->GetCompilerOptions().IsBootImage() ||
7181 codegen_->GetCompilerOptions().IsBootImageExtension());
7182 DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
7183 Register method_address = locations->InAt(0).AsRegister<Register>();
7184 __ leal(out, Address(method_address, CodeGeneratorX86::kPlaceholder32BitOffset));
7185 codegen_->RecordBootImageTypePatch(cls);
7186 break;
7187 }
7188 case HLoadClass::LoadKind::kBootImageRelRo: {
7189 DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
7190 Register method_address = locations->InAt(0).AsRegister<Register>();
7191 __ movl(out, Address(method_address, CodeGeneratorX86::kPlaceholder32BitOffset));
7192 codegen_->RecordBootImageRelRoPatch(cls->InputAt(0)->AsX86ComputeBaseMethodAddress(),
7193 CodeGenerator::GetBootImageOffset(cls));
7194 break;
7195 }
7196 case HLoadClass::LoadKind::kBssEntry:
7197 case HLoadClass::LoadKind::kBssEntryPublic:
7198 case HLoadClass::LoadKind::kBssEntryPackage: {
7199 Register method_address = locations->InAt(0).AsRegister<Register>();
7200 Address address(method_address, CodeGeneratorX86::kPlaceholder32BitOffset);
7201 Label* fixup_label = codegen_->NewTypeBssEntryPatch(cls);
7202 GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option);
7203 // No need for memory fence, thanks to the x86 memory model.
7204 generate_null_check = true;
7205 break;
7206 }
7207 case HLoadClass::LoadKind::kJitBootImageAddress: {
7208 DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
7209 uint32_t address = reinterpret_cast32<uint32_t>(cls->GetClass().Get());
7210 DCHECK_NE(address, 0u);
7211 __ movl(out, Immediate(address));
7212 break;
7213 }
7214 case HLoadClass::LoadKind::kJitTableAddress: {
7215 Address address = Address::Absolute(CodeGeneratorX86::kPlaceholder32BitOffset);
7216 Label* fixup_label = codegen_->NewJitRootClassPatch(
7217 cls->GetDexFile(), cls->GetTypeIndex(), cls->GetClass());
7218 // /* GcRoot<mirror::Class> */ out = *address
7219 GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option);
7220 break;
7221 }
7222 case HLoadClass::LoadKind::kRuntimeCall:
7223 case HLoadClass::LoadKind::kInvalid:
7224 LOG(FATAL) << "UNREACHABLE";
7225 UNREACHABLE();
7226 }
7227
7228 if (generate_null_check || cls->MustGenerateClinitCheck()) {
7229 DCHECK(cls->CanCallRuntime());
7230 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86(cls, cls);
7231 codegen_->AddSlowPath(slow_path);
7232
7233 if (generate_null_check) {
7234 __ testl(out, out);
7235 __ j(kEqual, slow_path->GetEntryLabel());
7236 }
7237
7238 if (cls->MustGenerateClinitCheck()) {
7239 GenerateClassInitializationCheck(slow_path, out);
7240 } else {
7241 __ Bind(slow_path->GetExitLabel());
7242 }
7243 }
7244 }
7245
VisitLoadMethodHandle(HLoadMethodHandle * load)7246 void LocationsBuilderX86::VisitLoadMethodHandle(HLoadMethodHandle* load) {
7247 InvokeRuntimeCallingConvention calling_convention;
7248 Location location = Location::RegisterLocation(calling_convention.GetRegisterAt(0));
7249 CodeGenerator::CreateLoadMethodHandleRuntimeCallLocationSummary(load, location, location);
7250 }
7251
VisitLoadMethodHandle(HLoadMethodHandle * load)7252 void InstructionCodeGeneratorX86::VisitLoadMethodHandle(HLoadMethodHandle* load) {
7253 codegen_->GenerateLoadMethodHandleRuntimeCall(load);
7254 }
7255
VisitLoadMethodType(HLoadMethodType * load)7256 void LocationsBuilderX86::VisitLoadMethodType(HLoadMethodType* load) {
7257 InvokeRuntimeCallingConvention calling_convention;
7258 Location location = Location::RegisterLocation(calling_convention.GetRegisterAt(0));
7259 CodeGenerator::CreateLoadMethodTypeRuntimeCallLocationSummary(load, location, location);
7260 }
7261
VisitLoadMethodType(HLoadMethodType * load)7262 void InstructionCodeGeneratorX86::VisitLoadMethodType(HLoadMethodType* load) {
7263 codegen_->GenerateLoadMethodTypeRuntimeCall(load);
7264 }
7265
VisitClinitCheck(HClinitCheck * check)7266 void LocationsBuilderX86::VisitClinitCheck(HClinitCheck* check) {
7267 LocationSummary* locations =
7268 new (GetGraph()->GetAllocator()) LocationSummary(check, LocationSummary::kCallOnSlowPath);
7269 locations->SetInAt(0, Location::RequiresRegister());
7270 if (check->HasUses()) {
7271 locations->SetOut(Location::SameAsFirstInput());
7272 }
7273 // Rely on the type initialization to save everything we need.
7274 locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
7275 }
7276
VisitClinitCheck(HClinitCheck * check)7277 void InstructionCodeGeneratorX86::VisitClinitCheck(HClinitCheck* check) {
7278 // We assume the class to not be null.
7279 SlowPathCode* slow_path =
7280 new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86(check->GetLoadClass(), check);
7281 codegen_->AddSlowPath(slow_path);
7282 GenerateClassInitializationCheck(slow_path,
7283 check->GetLocations()->InAt(0).AsRegister<Register>());
7284 }
7285
GenerateClassInitializationCheck(SlowPathCode * slow_path,Register class_reg)7286 void InstructionCodeGeneratorX86::GenerateClassInitializationCheck(
7287 SlowPathCode* slow_path, Register class_reg) {
7288 constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf();
7289 const size_t status_byte_offset =
7290 mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte);
7291 constexpr uint32_t shifted_visibly_initialized_value =
7292 enum_cast<uint32_t>(ClassStatus::kVisiblyInitialized) << (status_lsb_position % kBitsPerByte);
7293
7294 __ cmpb(Address(class_reg, status_byte_offset), Immediate(shifted_visibly_initialized_value));
7295 __ j(kBelow, slow_path->GetEntryLabel());
7296 __ Bind(slow_path->GetExitLabel());
7297 }
7298
GenerateBitstringTypeCheckCompare(HTypeCheckInstruction * check,Register temp)7299 void InstructionCodeGeneratorX86::GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check,
7300 Register temp) {
7301 uint32_t path_to_root = check->GetBitstringPathToRoot();
7302 uint32_t mask = check->GetBitstringMask();
7303 DCHECK(IsPowerOfTwo(mask + 1));
7304 size_t mask_bits = WhichPowerOf2(mask + 1);
7305
7306 if (mask_bits == 16u) {
7307 // Compare the bitstring in memory.
7308 __ cmpw(Address(temp, mirror::Class::StatusOffset()), Immediate(path_to_root));
7309 } else {
7310 // /* uint32_t */ temp = temp->status_
7311 __ movl(temp, Address(temp, mirror::Class::StatusOffset()));
7312 // Compare the bitstring bits using SUB.
7313 __ subl(temp, Immediate(path_to_root));
7314 // Shift out bits that do not contribute to the comparison.
7315 __ shll(temp, Immediate(32u - mask_bits));
7316 }
7317 }
7318
GetSupportedLoadStringKind(HLoadString::LoadKind desired_string_load_kind)7319 HLoadString::LoadKind CodeGeneratorX86::GetSupportedLoadStringKind(
7320 HLoadString::LoadKind desired_string_load_kind) {
7321 switch (desired_string_load_kind) {
7322 case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
7323 case HLoadString::LoadKind::kBootImageRelRo:
7324 case HLoadString::LoadKind::kBssEntry:
7325 DCHECK(!GetCompilerOptions().IsJitCompiler());
7326 break;
7327 case HLoadString::LoadKind::kJitBootImageAddress:
7328 case HLoadString::LoadKind::kJitTableAddress:
7329 DCHECK(GetCompilerOptions().IsJitCompiler());
7330 break;
7331 case HLoadString::LoadKind::kRuntimeCall:
7332 break;
7333 }
7334 return desired_string_load_kind;
7335 }
7336
VisitLoadString(HLoadString * load)7337 void LocationsBuilderX86::VisitLoadString(HLoadString* load) {
7338 LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load);
7339 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(load, call_kind);
7340 HLoadString::LoadKind load_kind = load->GetLoadKind();
7341 if (load_kind == HLoadString::LoadKind::kBootImageLinkTimePcRelative ||
7342 load_kind == HLoadString::LoadKind::kBootImageRelRo ||
7343 load_kind == HLoadString::LoadKind::kBssEntry) {
7344 locations->SetInAt(0, Location::RequiresRegister());
7345 }
7346 if (load_kind == HLoadString::LoadKind::kRuntimeCall) {
7347 locations->SetOut(Location::RegisterLocation(EAX));
7348 } else {
7349 locations->SetOut(Location::RequiresRegister());
7350 if (load_kind == HLoadString::LoadKind::kBssEntry) {
7351 if (!kUseReadBarrier || kUseBakerReadBarrier) {
7352 // Rely on the pResolveString to save everything.
7353 locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
7354 } else {
7355 // For non-Baker read barrier we have a temp-clobbering call.
7356 }
7357 }
7358 }
7359 }
7360
NewJitRootStringPatch(const DexFile & dex_file,dex::StringIndex string_index,Handle<mirror::String> handle)7361 Label* CodeGeneratorX86::NewJitRootStringPatch(const DexFile& dex_file,
7362 dex::StringIndex string_index,
7363 Handle<mirror::String> handle) {
7364 ReserveJitStringRoot(StringReference(&dex_file, string_index), handle);
7365 // Add a patch entry and return the label.
7366 jit_string_patches_.emplace_back(&dex_file, string_index.index_);
7367 PatchInfo<Label>* info = &jit_string_patches_.back();
7368 return &info->label;
7369 }
7370
7371 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
7372 // move.
VisitLoadString(HLoadString * load)7373 void InstructionCodeGeneratorX86::VisitLoadString(HLoadString* load) NO_THREAD_SAFETY_ANALYSIS {
7374 LocationSummary* locations = load->GetLocations();
7375 Location out_loc = locations->Out();
7376 Register out = out_loc.AsRegister<Register>();
7377
7378 switch (load->GetLoadKind()) {
7379 case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
7380 DCHECK(codegen_->GetCompilerOptions().IsBootImage() ||
7381 codegen_->GetCompilerOptions().IsBootImageExtension());
7382 Register method_address = locations->InAt(0).AsRegister<Register>();
7383 __ leal(out, Address(method_address, CodeGeneratorX86::kPlaceholder32BitOffset));
7384 codegen_->RecordBootImageStringPatch(load);
7385 return;
7386 }
7387 case HLoadString::LoadKind::kBootImageRelRo: {
7388 DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
7389 Register method_address = locations->InAt(0).AsRegister<Register>();
7390 __ movl(out, Address(method_address, CodeGeneratorX86::kPlaceholder32BitOffset));
7391 codegen_->RecordBootImageRelRoPatch(load->InputAt(0)->AsX86ComputeBaseMethodAddress(),
7392 CodeGenerator::GetBootImageOffset(load));
7393 return;
7394 }
7395 case HLoadString::LoadKind::kBssEntry: {
7396 Register method_address = locations->InAt(0).AsRegister<Register>();
7397 Address address = Address(method_address, CodeGeneratorX86::kPlaceholder32BitOffset);
7398 Label* fixup_label = codegen_->NewStringBssEntryPatch(load);
7399 // /* GcRoot<mirror::String> */ out = *address /* PC-relative */
7400 GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kCompilerReadBarrierOption);
7401 // No need for memory fence, thanks to the x86 memory model.
7402 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadStringSlowPathX86(load);
7403 codegen_->AddSlowPath(slow_path);
7404 __ testl(out, out);
7405 __ j(kEqual, slow_path->GetEntryLabel());
7406 __ Bind(slow_path->GetExitLabel());
7407 return;
7408 }
7409 case HLoadString::LoadKind::kJitBootImageAddress: {
7410 uint32_t address = reinterpret_cast32<uint32_t>(load->GetString().Get());
7411 DCHECK_NE(address, 0u);
7412 __ movl(out, Immediate(address));
7413 return;
7414 }
7415 case HLoadString::LoadKind::kJitTableAddress: {
7416 Address address = Address::Absolute(CodeGeneratorX86::kPlaceholder32BitOffset);
7417 Label* fixup_label = codegen_->NewJitRootStringPatch(
7418 load->GetDexFile(), load->GetStringIndex(), load->GetString());
7419 // /* GcRoot<mirror::String> */ out = *address
7420 GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kCompilerReadBarrierOption);
7421 return;
7422 }
7423 default:
7424 break;
7425 }
7426
7427 // TODO: Re-add the compiler code to do string dex cache lookup again.
7428 InvokeRuntimeCallingConvention calling_convention;
7429 DCHECK_EQ(calling_convention.GetRegisterAt(0), out);
7430 __ movl(calling_convention.GetRegisterAt(0), Immediate(load->GetStringIndex().index_));
7431 codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc());
7432 CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
7433 }
7434
GetExceptionTlsAddress()7435 static Address GetExceptionTlsAddress() {
7436 return Address::Absolute(Thread::ExceptionOffset<kX86PointerSize>().Int32Value());
7437 }
7438
VisitLoadException(HLoadException * load)7439 void LocationsBuilderX86::VisitLoadException(HLoadException* load) {
7440 LocationSummary* locations =
7441 new (GetGraph()->GetAllocator()) LocationSummary(load, LocationSummary::kNoCall);
7442 locations->SetOut(Location::RequiresRegister());
7443 }
7444
VisitLoadException(HLoadException * load)7445 void InstructionCodeGeneratorX86::VisitLoadException(HLoadException* load) {
7446 __ fs()->movl(load->GetLocations()->Out().AsRegister<Register>(), GetExceptionTlsAddress());
7447 }
7448
VisitClearException(HClearException * clear)7449 void LocationsBuilderX86::VisitClearException(HClearException* clear) {
7450 new (GetGraph()->GetAllocator()) LocationSummary(clear, LocationSummary::kNoCall);
7451 }
7452
VisitClearException(HClearException * clear ATTRIBUTE_UNUSED)7453 void InstructionCodeGeneratorX86::VisitClearException(HClearException* clear ATTRIBUTE_UNUSED) {
7454 __ fs()->movl(GetExceptionTlsAddress(), Immediate(0));
7455 }
7456
VisitThrow(HThrow * instruction)7457 void LocationsBuilderX86::VisitThrow(HThrow* instruction) {
7458 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
7459 instruction, LocationSummary::kCallOnMainOnly);
7460 InvokeRuntimeCallingConvention calling_convention;
7461 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
7462 }
7463
VisitThrow(HThrow * instruction)7464 void InstructionCodeGeneratorX86::VisitThrow(HThrow* instruction) {
7465 codegen_->InvokeRuntime(kQuickDeliverException, instruction, instruction->GetDexPc());
7466 CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
7467 }
7468
7469 // Temp is used for read barrier.
NumberOfInstanceOfTemps(TypeCheckKind type_check_kind)7470 static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) {
7471 if (kEmitCompilerReadBarrier &&
7472 !kUseBakerReadBarrier &&
7473 (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
7474 type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
7475 type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
7476 return 1;
7477 }
7478 return 0;
7479 }
7480
7481 // Interface case has 2 temps, one for holding the number of interfaces, one for the current
7482 // interface pointer, the current interface is compared in memory.
7483 // The other checks have one temp for loading the object's class.
NumberOfCheckCastTemps(TypeCheckKind type_check_kind)7484 static size_t NumberOfCheckCastTemps(TypeCheckKind type_check_kind) {
7485 if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
7486 return 2;
7487 }
7488 return 1 + NumberOfInstanceOfTemps(type_check_kind);
7489 }
7490
VisitInstanceOf(HInstanceOf * instruction)7491 void LocationsBuilderX86::VisitInstanceOf(HInstanceOf* instruction) {
7492 LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
7493 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
7494 bool baker_read_barrier_slow_path = false;
7495 switch (type_check_kind) {
7496 case TypeCheckKind::kExactCheck:
7497 case TypeCheckKind::kAbstractClassCheck:
7498 case TypeCheckKind::kClassHierarchyCheck:
7499 case TypeCheckKind::kArrayObjectCheck: {
7500 bool needs_read_barrier = CodeGenerator::InstanceOfNeedsReadBarrier(instruction);
7501 call_kind = needs_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
7502 baker_read_barrier_slow_path = kUseBakerReadBarrier && needs_read_barrier;
7503 break;
7504 }
7505 case TypeCheckKind::kArrayCheck:
7506 case TypeCheckKind::kUnresolvedCheck:
7507 case TypeCheckKind::kInterfaceCheck:
7508 call_kind = LocationSummary::kCallOnSlowPath;
7509 break;
7510 case TypeCheckKind::kBitstringCheck:
7511 break;
7512 }
7513
7514 LocationSummary* locations =
7515 new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
7516 if (baker_read_barrier_slow_path) {
7517 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
7518 }
7519 locations->SetInAt(0, Location::RequiresRegister());
7520 if (type_check_kind == TypeCheckKind::kBitstringCheck) {
7521 locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
7522 locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
7523 locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
7524 } else {
7525 locations->SetInAt(1, Location::Any());
7526 }
7527 // Note that TypeCheckSlowPathX86 uses this "out" register too.
7528 locations->SetOut(Location::RequiresRegister());
7529 // When read barriers are enabled, we need a temporary register for some cases.
7530 locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind));
7531 }
7532
VisitInstanceOf(HInstanceOf * instruction)7533 void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) {
7534 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
7535 LocationSummary* locations = instruction->GetLocations();
7536 Location obj_loc = locations->InAt(0);
7537 Register obj = obj_loc.AsRegister<Register>();
7538 Location cls = locations->InAt(1);
7539 Location out_loc = locations->Out();
7540 Register out = out_loc.AsRegister<Register>();
7541 const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind);
7542 DCHECK_LE(num_temps, 1u);
7543 Location maybe_temp_loc = (num_temps >= 1) ? locations->GetTemp(0) : Location::NoLocation();
7544 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
7545 uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
7546 uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
7547 uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
7548 SlowPathCode* slow_path = nullptr;
7549 NearLabel done, zero;
7550
7551 // Return 0 if `obj` is null.
7552 // Avoid null check if we know obj is not null.
7553 if (instruction->MustDoNullCheck()) {
7554 __ testl(obj, obj);
7555 __ j(kEqual, &zero);
7556 }
7557
7558 switch (type_check_kind) {
7559 case TypeCheckKind::kExactCheck: {
7560 ReadBarrierOption read_barrier_option =
7561 CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
7562 // /* HeapReference<Class> */ out = obj->klass_
7563 GenerateReferenceLoadTwoRegisters(instruction,
7564 out_loc,
7565 obj_loc,
7566 class_offset,
7567 read_barrier_option);
7568 if (cls.IsRegister()) {
7569 __ cmpl(out, cls.AsRegister<Register>());
7570 } else {
7571 DCHECK(cls.IsStackSlot()) << cls;
7572 __ cmpl(out, Address(ESP, cls.GetStackIndex()));
7573 }
7574
7575 // Classes must be equal for the instanceof to succeed.
7576 __ j(kNotEqual, &zero);
7577 __ movl(out, Immediate(1));
7578 __ jmp(&done);
7579 break;
7580 }
7581
7582 case TypeCheckKind::kAbstractClassCheck: {
7583 ReadBarrierOption read_barrier_option =
7584 CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
7585 // /* HeapReference<Class> */ out = obj->klass_
7586 GenerateReferenceLoadTwoRegisters(instruction,
7587 out_loc,
7588 obj_loc,
7589 class_offset,
7590 read_barrier_option);
7591 // If the class is abstract, we eagerly fetch the super class of the
7592 // object to avoid doing a comparison we know will fail.
7593 NearLabel loop;
7594 __ Bind(&loop);
7595 // /* HeapReference<Class> */ out = out->super_class_
7596 GenerateReferenceLoadOneRegister(instruction,
7597 out_loc,
7598 super_offset,
7599 maybe_temp_loc,
7600 read_barrier_option);
7601 __ testl(out, out);
7602 // If `out` is null, we use it for the result, and jump to `done`.
7603 __ j(kEqual, &done);
7604 if (cls.IsRegister()) {
7605 __ cmpl(out, cls.AsRegister<Register>());
7606 } else {
7607 DCHECK(cls.IsStackSlot()) << cls;
7608 __ cmpl(out, Address(ESP, cls.GetStackIndex()));
7609 }
7610 __ j(kNotEqual, &loop);
7611 __ movl(out, Immediate(1));
7612 if (zero.IsLinked()) {
7613 __ jmp(&done);
7614 }
7615 break;
7616 }
7617
7618 case TypeCheckKind::kClassHierarchyCheck: {
7619 ReadBarrierOption read_barrier_option =
7620 CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
7621 // /* HeapReference<Class> */ out = obj->klass_
7622 GenerateReferenceLoadTwoRegisters(instruction,
7623 out_loc,
7624 obj_loc,
7625 class_offset,
7626 read_barrier_option);
7627 // Walk over the class hierarchy to find a match.
7628 NearLabel loop, success;
7629 __ Bind(&loop);
7630 if (cls.IsRegister()) {
7631 __ cmpl(out, cls.AsRegister<Register>());
7632 } else {
7633 DCHECK(cls.IsStackSlot()) << cls;
7634 __ cmpl(out, Address(ESP, cls.GetStackIndex()));
7635 }
7636 __ j(kEqual, &success);
7637 // /* HeapReference<Class> */ out = out->super_class_
7638 GenerateReferenceLoadOneRegister(instruction,
7639 out_loc,
7640 super_offset,
7641 maybe_temp_loc,
7642 read_barrier_option);
7643 __ testl(out, out);
7644 __ j(kNotEqual, &loop);
7645 // If `out` is null, we use it for the result, and jump to `done`.
7646 __ jmp(&done);
7647 __ Bind(&success);
7648 __ movl(out, Immediate(1));
7649 if (zero.IsLinked()) {
7650 __ jmp(&done);
7651 }
7652 break;
7653 }
7654
7655 case TypeCheckKind::kArrayObjectCheck: {
7656 ReadBarrierOption read_barrier_option =
7657 CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
7658 // /* HeapReference<Class> */ out = obj->klass_
7659 GenerateReferenceLoadTwoRegisters(instruction,
7660 out_loc,
7661 obj_loc,
7662 class_offset,
7663 read_barrier_option);
7664 // Do an exact check.
7665 NearLabel exact_check;
7666 if (cls.IsRegister()) {
7667 __ cmpl(out, cls.AsRegister<Register>());
7668 } else {
7669 DCHECK(cls.IsStackSlot()) << cls;
7670 __ cmpl(out, Address(ESP, cls.GetStackIndex()));
7671 }
7672 __ j(kEqual, &exact_check);
7673 // Otherwise, we need to check that the object's class is a non-primitive array.
7674 // /* HeapReference<Class> */ out = out->component_type_
7675 GenerateReferenceLoadOneRegister(instruction,
7676 out_loc,
7677 component_offset,
7678 maybe_temp_loc,
7679 read_barrier_option);
7680 __ testl(out, out);
7681 // If `out` is null, we use it for the result, and jump to `done`.
7682 __ j(kEqual, &done);
7683 __ cmpw(Address(out, primitive_offset), Immediate(Primitive::kPrimNot));
7684 __ j(kNotEqual, &zero);
7685 __ Bind(&exact_check);
7686 __ movl(out, Immediate(1));
7687 __ jmp(&done);
7688 break;
7689 }
7690
7691 case TypeCheckKind::kArrayCheck: {
7692 // No read barrier since the slow path will retry upon failure.
7693 // /* HeapReference<Class> */ out = obj->klass_
7694 GenerateReferenceLoadTwoRegisters(instruction,
7695 out_loc,
7696 obj_loc,
7697 class_offset,
7698 kWithoutReadBarrier);
7699 if (cls.IsRegister()) {
7700 __ cmpl(out, cls.AsRegister<Register>());
7701 } else {
7702 DCHECK(cls.IsStackSlot()) << cls;
7703 __ cmpl(out, Address(ESP, cls.GetStackIndex()));
7704 }
7705 DCHECK(locations->OnlyCallsOnSlowPath());
7706 slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86(
7707 instruction, /* is_fatal= */ false);
7708 codegen_->AddSlowPath(slow_path);
7709 __ j(kNotEqual, slow_path->GetEntryLabel());
7710 __ movl(out, Immediate(1));
7711 if (zero.IsLinked()) {
7712 __ jmp(&done);
7713 }
7714 break;
7715 }
7716
7717 case TypeCheckKind::kUnresolvedCheck:
7718 case TypeCheckKind::kInterfaceCheck: {
7719 // Note that we indeed only call on slow path, but we always go
7720 // into the slow path for the unresolved and interface check
7721 // cases.
7722 //
7723 // We cannot directly call the InstanceofNonTrivial runtime
7724 // entry point without resorting to a type checking slow path
7725 // here (i.e. by calling InvokeRuntime directly), as it would
7726 // require to assign fixed registers for the inputs of this
7727 // HInstanceOf instruction (following the runtime calling
7728 // convention), which might be cluttered by the potential first
7729 // read barrier emission at the beginning of this method.
7730 //
7731 // TODO: Introduce a new runtime entry point taking the object
7732 // to test (instead of its class) as argument, and let it deal
7733 // with the read barrier issues. This will let us refactor this
7734 // case of the `switch` code as it was previously (with a direct
7735 // call to the runtime not using a type checking slow path).
7736 // This should also be beneficial for the other cases above.
7737 DCHECK(locations->OnlyCallsOnSlowPath());
7738 slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86(
7739 instruction, /* is_fatal= */ false);
7740 codegen_->AddSlowPath(slow_path);
7741 __ jmp(slow_path->GetEntryLabel());
7742 if (zero.IsLinked()) {
7743 __ jmp(&done);
7744 }
7745 break;
7746 }
7747
7748 case TypeCheckKind::kBitstringCheck: {
7749 // /* HeapReference<Class> */ temp = obj->klass_
7750 GenerateReferenceLoadTwoRegisters(instruction,
7751 out_loc,
7752 obj_loc,
7753 class_offset,
7754 kWithoutReadBarrier);
7755
7756 GenerateBitstringTypeCheckCompare(instruction, out);
7757 __ j(kNotEqual, &zero);
7758 __ movl(out, Immediate(1));
7759 __ jmp(&done);
7760 break;
7761 }
7762 }
7763
7764 if (zero.IsLinked()) {
7765 __ Bind(&zero);
7766 __ xorl(out, out);
7767 }
7768
7769 if (done.IsLinked()) {
7770 __ Bind(&done);
7771 }
7772
7773 if (slow_path != nullptr) {
7774 __ Bind(slow_path->GetExitLabel());
7775 }
7776 }
7777
VisitCheckCast(HCheckCast * instruction)7778 void LocationsBuilderX86::VisitCheckCast(HCheckCast* instruction) {
7779 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
7780 LocationSummary::CallKind call_kind = CodeGenerator::GetCheckCastCallKind(instruction);
7781 LocationSummary* locations =
7782 new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
7783 locations->SetInAt(0, Location::RequiresRegister());
7784 if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
7785 // Require a register for the interface check since there is a loop that compares the class to
7786 // a memory address.
7787 locations->SetInAt(1, Location::RequiresRegister());
7788 } else if (type_check_kind == TypeCheckKind::kBitstringCheck) {
7789 locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
7790 locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
7791 locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
7792 } else {
7793 locations->SetInAt(1, Location::Any());
7794 }
7795 // Add temps for read barriers and other uses. One is used by TypeCheckSlowPathX86.
7796 locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind));
7797 }
7798
VisitCheckCast(HCheckCast * instruction)7799 void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) {
7800 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
7801 LocationSummary* locations = instruction->GetLocations();
7802 Location obj_loc = locations->InAt(0);
7803 Register obj = obj_loc.AsRegister<Register>();
7804 Location cls = locations->InAt(1);
7805 Location temp_loc = locations->GetTemp(0);
7806 Register temp = temp_loc.AsRegister<Register>();
7807 const size_t num_temps = NumberOfCheckCastTemps(type_check_kind);
7808 DCHECK_GE(num_temps, 1u);
7809 DCHECK_LE(num_temps, 2u);
7810 Location maybe_temp2_loc = (num_temps >= 2) ? locations->GetTemp(1) : Location::NoLocation();
7811 const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
7812 const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
7813 const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
7814 const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
7815 const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value();
7816 const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value();
7817 const uint32_t object_array_data_offset =
7818 mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
7819
7820 bool is_type_check_slow_path_fatal = CodeGenerator::IsTypeCheckSlowPathFatal(instruction);
7821 SlowPathCode* type_check_slow_path =
7822 new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86(
7823 instruction, is_type_check_slow_path_fatal);
7824 codegen_->AddSlowPath(type_check_slow_path);
7825
7826 NearLabel done;
7827 // Avoid null check if we know obj is not null.
7828 if (instruction->MustDoNullCheck()) {
7829 __ testl(obj, obj);
7830 __ j(kEqual, &done);
7831 }
7832
7833 switch (type_check_kind) {
7834 case TypeCheckKind::kExactCheck:
7835 case TypeCheckKind::kArrayCheck: {
7836 // /* HeapReference<Class> */ temp = obj->klass_
7837 GenerateReferenceLoadTwoRegisters(instruction,
7838 temp_loc,
7839 obj_loc,
7840 class_offset,
7841 kWithoutReadBarrier);
7842
7843 if (cls.IsRegister()) {
7844 __ cmpl(temp, cls.AsRegister<Register>());
7845 } else {
7846 DCHECK(cls.IsStackSlot()) << cls;
7847 __ cmpl(temp, Address(ESP, cls.GetStackIndex()));
7848 }
7849 // Jump to slow path for throwing the exception or doing a
7850 // more involved array check.
7851 __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
7852 break;
7853 }
7854
7855 case TypeCheckKind::kAbstractClassCheck: {
7856 // /* HeapReference<Class> */ temp = obj->klass_
7857 GenerateReferenceLoadTwoRegisters(instruction,
7858 temp_loc,
7859 obj_loc,
7860 class_offset,
7861 kWithoutReadBarrier);
7862
7863 // If the class is abstract, we eagerly fetch the super class of the
7864 // object to avoid doing a comparison we know will fail.
7865 NearLabel loop;
7866 __ Bind(&loop);
7867 // /* HeapReference<Class> */ temp = temp->super_class_
7868 GenerateReferenceLoadOneRegister(instruction,
7869 temp_loc,
7870 super_offset,
7871 maybe_temp2_loc,
7872 kWithoutReadBarrier);
7873
7874 // If the class reference currently in `temp` is null, jump to the slow path to throw the
7875 // exception.
7876 __ testl(temp, temp);
7877 __ j(kZero, type_check_slow_path->GetEntryLabel());
7878
7879 // Otherwise, compare the classes
7880 if (cls.IsRegister()) {
7881 __ cmpl(temp, cls.AsRegister<Register>());
7882 } else {
7883 DCHECK(cls.IsStackSlot()) << cls;
7884 __ cmpl(temp, Address(ESP, cls.GetStackIndex()));
7885 }
7886 __ j(kNotEqual, &loop);
7887 break;
7888 }
7889
7890 case TypeCheckKind::kClassHierarchyCheck: {
7891 // /* HeapReference<Class> */ temp = obj->klass_
7892 GenerateReferenceLoadTwoRegisters(instruction,
7893 temp_loc,
7894 obj_loc,
7895 class_offset,
7896 kWithoutReadBarrier);
7897
7898 // Walk over the class hierarchy to find a match.
7899 NearLabel loop;
7900 __ Bind(&loop);
7901 if (cls.IsRegister()) {
7902 __ cmpl(temp, cls.AsRegister<Register>());
7903 } else {
7904 DCHECK(cls.IsStackSlot()) << cls;
7905 __ cmpl(temp, Address(ESP, cls.GetStackIndex()));
7906 }
7907 __ j(kEqual, &done);
7908
7909 // /* HeapReference<Class> */ temp = temp->super_class_
7910 GenerateReferenceLoadOneRegister(instruction,
7911 temp_loc,
7912 super_offset,
7913 maybe_temp2_loc,
7914 kWithoutReadBarrier);
7915
7916 // If the class reference currently in `temp` is not null, jump
7917 // back at the beginning of the loop.
7918 __ testl(temp, temp);
7919 __ j(kNotZero, &loop);
7920 // Otherwise, jump to the slow path to throw the exception.;
7921 __ jmp(type_check_slow_path->GetEntryLabel());
7922 break;
7923 }
7924
7925 case TypeCheckKind::kArrayObjectCheck: {
7926 // /* HeapReference<Class> */ temp = obj->klass_
7927 GenerateReferenceLoadTwoRegisters(instruction,
7928 temp_loc,
7929 obj_loc,
7930 class_offset,
7931 kWithoutReadBarrier);
7932
7933 // Do an exact check.
7934 if (cls.IsRegister()) {
7935 __ cmpl(temp, cls.AsRegister<Register>());
7936 } else {
7937 DCHECK(cls.IsStackSlot()) << cls;
7938 __ cmpl(temp, Address(ESP, cls.GetStackIndex()));
7939 }
7940 __ j(kEqual, &done);
7941
7942 // Otherwise, we need to check that the object's class is a non-primitive array.
7943 // /* HeapReference<Class> */ temp = temp->component_type_
7944 GenerateReferenceLoadOneRegister(instruction,
7945 temp_loc,
7946 component_offset,
7947 maybe_temp2_loc,
7948 kWithoutReadBarrier);
7949
7950 // If the component type is null (i.e. the object not an array), jump to the slow path to
7951 // throw the exception. Otherwise proceed with the check.
7952 __ testl(temp, temp);
7953 __ j(kZero, type_check_slow_path->GetEntryLabel());
7954
7955 __ cmpw(Address(temp, primitive_offset), Immediate(Primitive::kPrimNot));
7956 __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
7957 break;
7958 }
7959
7960 case TypeCheckKind::kUnresolvedCheck:
7961 // We always go into the type check slow path for the unresolved check case.
7962 // We cannot directly call the CheckCast runtime entry point
7963 // without resorting to a type checking slow path here (i.e. by
7964 // calling InvokeRuntime directly), as it would require to
7965 // assign fixed registers for the inputs of this HInstanceOf
7966 // instruction (following the runtime calling convention), which
7967 // might be cluttered by the potential first read barrier
7968 // emission at the beginning of this method.
7969 __ jmp(type_check_slow_path->GetEntryLabel());
7970 break;
7971
7972 case TypeCheckKind::kInterfaceCheck: {
7973 // Fast path for the interface check. Try to avoid read barriers to improve the fast path.
7974 // We can not get false positives by doing this.
7975 // /* HeapReference<Class> */ temp = obj->klass_
7976 GenerateReferenceLoadTwoRegisters(instruction,
7977 temp_loc,
7978 obj_loc,
7979 class_offset,
7980 kWithoutReadBarrier);
7981
7982 // /* HeapReference<Class> */ temp = temp->iftable_
7983 GenerateReferenceLoadTwoRegisters(instruction,
7984 temp_loc,
7985 temp_loc,
7986 iftable_offset,
7987 kWithoutReadBarrier);
7988 // Iftable is never null.
7989 __ movl(maybe_temp2_loc.AsRegister<Register>(), Address(temp, array_length_offset));
7990 // Maybe poison the `cls` for direct comparison with memory.
7991 __ MaybePoisonHeapReference(cls.AsRegister<Register>());
7992 // Loop through the iftable and check if any class matches.
7993 NearLabel start_loop;
7994 __ Bind(&start_loop);
7995 // Need to subtract first to handle the empty array case.
7996 __ subl(maybe_temp2_loc.AsRegister<Register>(), Immediate(2));
7997 __ j(kNegative, type_check_slow_path->GetEntryLabel());
7998 // Go to next interface if the classes do not match.
7999 __ cmpl(cls.AsRegister<Register>(),
8000 CodeGeneratorX86::ArrayAddress(temp,
8001 maybe_temp2_loc,
8002 TIMES_4,
8003 object_array_data_offset));
8004 __ j(kNotEqual, &start_loop);
8005 // If `cls` was poisoned above, unpoison it.
8006 __ MaybeUnpoisonHeapReference(cls.AsRegister<Register>());
8007 break;
8008 }
8009
8010 case TypeCheckKind::kBitstringCheck: {
8011 // /* HeapReference<Class> */ temp = obj->klass_
8012 GenerateReferenceLoadTwoRegisters(instruction,
8013 temp_loc,
8014 obj_loc,
8015 class_offset,
8016 kWithoutReadBarrier);
8017
8018 GenerateBitstringTypeCheckCompare(instruction, temp);
8019 __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
8020 break;
8021 }
8022 }
8023 __ Bind(&done);
8024
8025 __ Bind(type_check_slow_path->GetExitLabel());
8026 }
8027
VisitMonitorOperation(HMonitorOperation * instruction)8028 void LocationsBuilderX86::VisitMonitorOperation(HMonitorOperation* instruction) {
8029 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
8030 instruction, LocationSummary::kCallOnMainOnly);
8031 InvokeRuntimeCallingConvention calling_convention;
8032 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
8033 }
8034
VisitMonitorOperation(HMonitorOperation * instruction)8035 void InstructionCodeGeneratorX86::VisitMonitorOperation(HMonitorOperation* instruction) {
8036 codegen_->InvokeRuntime(instruction->IsEnter() ? kQuickLockObject
8037 : kQuickUnlockObject,
8038 instruction,
8039 instruction->GetDexPc());
8040 if (instruction->IsEnter()) {
8041 CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
8042 } else {
8043 CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
8044 }
8045 }
8046
VisitX86AndNot(HX86AndNot * instruction)8047 void LocationsBuilderX86::VisitX86AndNot(HX86AndNot* instruction) {
8048 DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2());
8049 DCHECK(DataType::IsIntOrLongType(instruction->GetType())) << instruction->GetType();
8050 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
8051 locations->SetInAt(0, Location::RequiresRegister());
8052 locations->SetInAt(1, Location::RequiresRegister());
8053 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
8054 }
8055
VisitX86AndNot(HX86AndNot * instruction)8056 void InstructionCodeGeneratorX86::VisitX86AndNot(HX86AndNot* instruction) {
8057 LocationSummary* locations = instruction->GetLocations();
8058 Location first = locations->InAt(0);
8059 Location second = locations->InAt(1);
8060 Location dest = locations->Out();
8061 if (instruction->GetResultType() == DataType::Type::kInt32) {
8062 __ andn(dest.AsRegister<Register>(),
8063 first.AsRegister<Register>(),
8064 second.AsRegister<Register>());
8065 } else {
8066 DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
8067 __ andn(dest.AsRegisterPairLow<Register>(),
8068 first.AsRegisterPairLow<Register>(),
8069 second.AsRegisterPairLow<Register>());
8070 __ andn(dest.AsRegisterPairHigh<Register>(),
8071 first.AsRegisterPairHigh<Register>(),
8072 second.AsRegisterPairHigh<Register>());
8073 }
8074 }
8075
VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit * instruction)8076 void LocationsBuilderX86::VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit* instruction) {
8077 DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2());
8078 DCHECK(instruction->GetType() == DataType::Type::kInt32) << instruction->GetType();
8079 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
8080 locations->SetInAt(0, Location::RequiresRegister());
8081 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
8082 }
8083
VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit * instruction)8084 void InstructionCodeGeneratorX86::VisitX86MaskOrResetLeastSetBit(
8085 HX86MaskOrResetLeastSetBit* instruction) {
8086 LocationSummary* locations = instruction->GetLocations();
8087 Location src = locations->InAt(0);
8088 Location dest = locations->Out();
8089 DCHECK(instruction->GetResultType() == DataType::Type::kInt32);
8090 switch (instruction->GetOpKind()) {
8091 case HInstruction::kAnd:
8092 __ blsr(dest.AsRegister<Register>(), src.AsRegister<Register>());
8093 break;
8094 case HInstruction::kXor:
8095 __ blsmsk(dest.AsRegister<Register>(), src.AsRegister<Register>());
8096 break;
8097 default:
8098 LOG(FATAL) << "Unreachable";
8099 }
8100 }
8101
VisitAnd(HAnd * instruction)8102 void LocationsBuilderX86::VisitAnd(HAnd* instruction) { HandleBitwiseOperation(instruction); }
VisitOr(HOr * instruction)8103 void LocationsBuilderX86::VisitOr(HOr* instruction) { HandleBitwiseOperation(instruction); }
VisitXor(HXor * instruction)8104 void LocationsBuilderX86::VisitXor(HXor* instruction) { HandleBitwiseOperation(instruction); }
8105
HandleBitwiseOperation(HBinaryOperation * instruction)8106 void LocationsBuilderX86::HandleBitwiseOperation(HBinaryOperation* instruction) {
8107 LocationSummary* locations =
8108 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
8109 DCHECK(instruction->GetResultType() == DataType::Type::kInt32
8110 || instruction->GetResultType() == DataType::Type::kInt64);
8111 locations->SetInAt(0, Location::RequiresRegister());
8112 locations->SetInAt(1, Location::Any());
8113 locations->SetOut(Location::SameAsFirstInput());
8114 }
8115
VisitAnd(HAnd * instruction)8116 void InstructionCodeGeneratorX86::VisitAnd(HAnd* instruction) {
8117 HandleBitwiseOperation(instruction);
8118 }
8119
VisitOr(HOr * instruction)8120 void InstructionCodeGeneratorX86::VisitOr(HOr* instruction) {
8121 HandleBitwiseOperation(instruction);
8122 }
8123
VisitXor(HXor * instruction)8124 void InstructionCodeGeneratorX86::VisitXor(HXor* instruction) {
8125 HandleBitwiseOperation(instruction);
8126 }
8127
HandleBitwiseOperation(HBinaryOperation * instruction)8128 void InstructionCodeGeneratorX86::HandleBitwiseOperation(HBinaryOperation* instruction) {
8129 LocationSummary* locations = instruction->GetLocations();
8130 Location first = locations->InAt(0);
8131 Location second = locations->InAt(1);
8132 DCHECK(first.Equals(locations->Out()));
8133
8134 if (instruction->GetResultType() == DataType::Type::kInt32) {
8135 if (second.IsRegister()) {
8136 if (instruction->IsAnd()) {
8137 __ andl(first.AsRegister<Register>(), second.AsRegister<Register>());
8138 } else if (instruction->IsOr()) {
8139 __ orl(first.AsRegister<Register>(), second.AsRegister<Register>());
8140 } else {
8141 DCHECK(instruction->IsXor());
8142 __ xorl(first.AsRegister<Register>(), second.AsRegister<Register>());
8143 }
8144 } else if (second.IsConstant()) {
8145 if (instruction->IsAnd()) {
8146 __ andl(first.AsRegister<Register>(),
8147 Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
8148 } else if (instruction->IsOr()) {
8149 __ orl(first.AsRegister<Register>(),
8150 Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
8151 } else {
8152 DCHECK(instruction->IsXor());
8153 __ xorl(first.AsRegister<Register>(),
8154 Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
8155 }
8156 } else {
8157 if (instruction->IsAnd()) {
8158 __ andl(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
8159 } else if (instruction->IsOr()) {
8160 __ orl(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
8161 } else {
8162 DCHECK(instruction->IsXor());
8163 __ xorl(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
8164 }
8165 }
8166 } else {
8167 DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
8168 if (second.IsRegisterPair()) {
8169 if (instruction->IsAnd()) {
8170 __ andl(first.AsRegisterPairLow<Register>(), second.AsRegisterPairLow<Register>());
8171 __ andl(first.AsRegisterPairHigh<Register>(), second.AsRegisterPairHigh<Register>());
8172 } else if (instruction->IsOr()) {
8173 __ orl(first.AsRegisterPairLow<Register>(), second.AsRegisterPairLow<Register>());
8174 __ orl(first.AsRegisterPairHigh<Register>(), second.AsRegisterPairHigh<Register>());
8175 } else {
8176 DCHECK(instruction->IsXor());
8177 __ xorl(first.AsRegisterPairLow<Register>(), second.AsRegisterPairLow<Register>());
8178 __ xorl(first.AsRegisterPairHigh<Register>(), second.AsRegisterPairHigh<Register>());
8179 }
8180 } else if (second.IsDoubleStackSlot()) {
8181 if (instruction->IsAnd()) {
8182 __ andl(first.AsRegisterPairLow<Register>(), Address(ESP, second.GetStackIndex()));
8183 __ andl(first.AsRegisterPairHigh<Register>(),
8184 Address(ESP, second.GetHighStackIndex(kX86WordSize)));
8185 } else if (instruction->IsOr()) {
8186 __ orl(first.AsRegisterPairLow<Register>(), Address(ESP, second.GetStackIndex()));
8187 __ orl(first.AsRegisterPairHigh<Register>(),
8188 Address(ESP, second.GetHighStackIndex(kX86WordSize)));
8189 } else {
8190 DCHECK(instruction->IsXor());
8191 __ xorl(first.AsRegisterPairLow<Register>(), Address(ESP, second.GetStackIndex()));
8192 __ xorl(first.AsRegisterPairHigh<Register>(),
8193 Address(ESP, second.GetHighStackIndex(kX86WordSize)));
8194 }
8195 } else {
8196 DCHECK(second.IsConstant()) << second;
8197 int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
8198 int32_t low_value = Low32Bits(value);
8199 int32_t high_value = High32Bits(value);
8200 Immediate low(low_value);
8201 Immediate high(high_value);
8202 Register first_low = first.AsRegisterPairLow<Register>();
8203 Register first_high = first.AsRegisterPairHigh<Register>();
8204 if (instruction->IsAnd()) {
8205 if (low_value == 0) {
8206 __ xorl(first_low, first_low);
8207 } else if (low_value != -1) {
8208 __ andl(first_low, low);
8209 }
8210 if (high_value == 0) {
8211 __ xorl(first_high, first_high);
8212 } else if (high_value != -1) {
8213 __ andl(first_high, high);
8214 }
8215 } else if (instruction->IsOr()) {
8216 if (low_value != 0) {
8217 __ orl(first_low, low);
8218 }
8219 if (high_value != 0) {
8220 __ orl(first_high, high);
8221 }
8222 } else {
8223 DCHECK(instruction->IsXor());
8224 if (low_value != 0) {
8225 __ xorl(first_low, low);
8226 }
8227 if (high_value != 0) {
8228 __ xorl(first_high, high);
8229 }
8230 }
8231 }
8232 }
8233 }
8234
GenerateReferenceLoadOneRegister(HInstruction * instruction,Location out,uint32_t offset,Location maybe_temp,ReadBarrierOption read_barrier_option)8235 void InstructionCodeGeneratorX86::GenerateReferenceLoadOneRegister(
8236 HInstruction* instruction,
8237 Location out,
8238 uint32_t offset,
8239 Location maybe_temp,
8240 ReadBarrierOption read_barrier_option) {
8241 Register out_reg = out.AsRegister<Register>();
8242 if (read_barrier_option == kWithReadBarrier) {
8243 CHECK(kEmitCompilerReadBarrier);
8244 if (kUseBakerReadBarrier) {
8245 // Load with fast path based Baker's read barrier.
8246 // /* HeapReference<Object> */ out = *(out + offset)
8247 codegen_->GenerateFieldLoadWithBakerReadBarrier(
8248 instruction, out, out_reg, offset, /* needs_null_check= */ false);
8249 } else {
8250 // Load with slow path based read barrier.
8251 // Save the value of `out` into `maybe_temp` before overwriting it
8252 // in the following move operation, as we will need it for the
8253 // read barrier below.
8254 DCHECK(maybe_temp.IsRegister()) << maybe_temp;
8255 __ movl(maybe_temp.AsRegister<Register>(), out_reg);
8256 // /* HeapReference<Object> */ out = *(out + offset)
8257 __ movl(out_reg, Address(out_reg, offset));
8258 codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset);
8259 }
8260 } else {
8261 // Plain load with no read barrier.
8262 // /* HeapReference<Object> */ out = *(out + offset)
8263 __ movl(out_reg, Address(out_reg, offset));
8264 __ MaybeUnpoisonHeapReference(out_reg);
8265 }
8266 }
8267
GenerateReferenceLoadTwoRegisters(HInstruction * instruction,Location out,Location obj,uint32_t offset,ReadBarrierOption read_barrier_option)8268 void InstructionCodeGeneratorX86::GenerateReferenceLoadTwoRegisters(
8269 HInstruction* instruction,
8270 Location out,
8271 Location obj,
8272 uint32_t offset,
8273 ReadBarrierOption read_barrier_option) {
8274 Register out_reg = out.AsRegister<Register>();
8275 Register obj_reg = obj.AsRegister<Register>();
8276 if (read_barrier_option == kWithReadBarrier) {
8277 CHECK(kEmitCompilerReadBarrier);
8278 if (kUseBakerReadBarrier) {
8279 // Load with fast path based Baker's read barrier.
8280 // /* HeapReference<Object> */ out = *(obj + offset)
8281 codegen_->GenerateFieldLoadWithBakerReadBarrier(
8282 instruction, out, obj_reg, offset, /* needs_null_check= */ false);
8283 } else {
8284 // Load with slow path based read barrier.
8285 // /* HeapReference<Object> */ out = *(obj + offset)
8286 __ movl(out_reg, Address(obj_reg, offset));
8287 codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset);
8288 }
8289 } else {
8290 // Plain load with no read barrier.
8291 // /* HeapReference<Object> */ out = *(obj + offset)
8292 __ movl(out_reg, Address(obj_reg, offset));
8293 __ MaybeUnpoisonHeapReference(out_reg);
8294 }
8295 }
8296
GenerateGcRootFieldLoad(HInstruction * instruction,Location root,const Address & address,Label * fixup_label,ReadBarrierOption read_barrier_option)8297 void InstructionCodeGeneratorX86::GenerateGcRootFieldLoad(
8298 HInstruction* instruction,
8299 Location root,
8300 const Address& address,
8301 Label* fixup_label,
8302 ReadBarrierOption read_barrier_option) {
8303 Register root_reg = root.AsRegister<Register>();
8304 if (read_barrier_option == kWithReadBarrier) {
8305 DCHECK(kEmitCompilerReadBarrier);
8306 if (kUseBakerReadBarrier) {
8307 // Fast path implementation of art::ReadBarrier::BarrierForRoot when
8308 // Baker's read barrier are used:
8309 //
8310 // root = obj.field;
8311 // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
8312 // if (temp != null) {
8313 // root = temp(root)
8314 // }
8315
8316 // /* GcRoot<mirror::Object> */ root = *address
8317 __ movl(root_reg, address);
8318 if (fixup_label != nullptr) {
8319 __ Bind(fixup_label);
8320 }
8321 static_assert(
8322 sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
8323 "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
8324 "have different sizes.");
8325 static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
8326 "art::mirror::CompressedReference<mirror::Object> and int32_t "
8327 "have different sizes.");
8328
8329 // Slow path marking the GC root `root`.
8330 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) ReadBarrierMarkSlowPathX86(
8331 instruction, root, /* unpoison_ref_before_marking= */ false);
8332 codegen_->AddSlowPath(slow_path);
8333
8334 // Test the entrypoint (`Thread::Current()->pReadBarrierMarkReg ## root.reg()`).
8335 const int32_t entry_point_offset =
8336 Thread::ReadBarrierMarkEntryPointsOffset<kX86PointerSize>(root.reg());
8337 __ fs()->cmpl(Address::Absolute(entry_point_offset), Immediate(0));
8338 // The entrypoint is null when the GC is not marking.
8339 __ j(kNotEqual, slow_path->GetEntryLabel());
8340 __ Bind(slow_path->GetExitLabel());
8341 } else {
8342 // GC root loaded through a slow path for read barriers other
8343 // than Baker's.
8344 // /* GcRoot<mirror::Object>* */ root = address
8345 __ leal(root_reg, address);
8346 if (fixup_label != nullptr) {
8347 __ Bind(fixup_label);
8348 }
8349 // /* mirror::Object* */ root = root->Read()
8350 codegen_->GenerateReadBarrierForRootSlow(instruction, root, root);
8351 }
8352 } else {
8353 // Plain GC root load with no read barrier.
8354 // /* GcRoot<mirror::Object> */ root = *address
8355 __ movl(root_reg, address);
8356 if (fixup_label != nullptr) {
8357 __ Bind(fixup_label);
8358 }
8359 // Note that GC roots are not affected by heap poisoning, thus we
8360 // do not have to unpoison `root_reg` here.
8361 }
8362 }
8363
GenerateFieldLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,Register obj,uint32_t offset,bool needs_null_check)8364 void CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
8365 Location ref,
8366 Register obj,
8367 uint32_t offset,
8368 bool needs_null_check) {
8369 DCHECK(kEmitCompilerReadBarrier);
8370 DCHECK(kUseBakerReadBarrier);
8371
8372 // /* HeapReference<Object> */ ref = *(obj + offset)
8373 Address src(obj, offset);
8374 GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check);
8375 }
8376
GenerateArrayLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,Register obj,uint32_t data_offset,Location index,bool needs_null_check)8377 void CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
8378 Location ref,
8379 Register obj,
8380 uint32_t data_offset,
8381 Location index,
8382 bool needs_null_check) {
8383 DCHECK(kEmitCompilerReadBarrier);
8384 DCHECK(kUseBakerReadBarrier);
8385
8386 static_assert(
8387 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
8388 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
8389 // /* HeapReference<Object> */ ref =
8390 // *(obj + data_offset + index * sizeof(HeapReference<Object>))
8391 Address src = CodeGeneratorX86::ArrayAddress(obj, index, TIMES_4, data_offset);
8392 GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check);
8393 }
8394
GenerateReferenceLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,Register obj,const Address & src,bool needs_null_check,bool always_update_field,Register * temp)8395 void CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
8396 Location ref,
8397 Register obj,
8398 const Address& src,
8399 bool needs_null_check,
8400 bool always_update_field,
8401 Register* temp) {
8402 DCHECK(kEmitCompilerReadBarrier);
8403 DCHECK(kUseBakerReadBarrier);
8404
8405 // In slow path based read barriers, the read barrier call is
8406 // inserted after the original load. However, in fast path based
8407 // Baker's read barriers, we need to perform the load of
8408 // mirror::Object::monitor_ *before* the original reference load.
8409 // This load-load ordering is required by the read barrier.
8410 // The fast path/slow path (for Baker's algorithm) should look like:
8411 //
8412 // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
8413 // lfence; // Load fence or artificial data dependency to prevent load-load reordering
8414 // HeapReference<Object> ref = *src; // Original reference load.
8415 // bool is_gray = (rb_state == ReadBarrier::GrayState());
8416 // if (is_gray) {
8417 // ref = ReadBarrier::Mark(ref); // Performed by runtime entrypoint slow path.
8418 // }
8419 //
8420 // Note: the original implementation in ReadBarrier::Barrier is
8421 // slightly more complex as:
8422 // - it implements the load-load fence using a data dependency on
8423 // the high-bits of rb_state, which are expected to be all zeroes
8424 // (we use CodeGeneratorX86::GenerateMemoryBarrier instead here,
8425 // which is a no-op thanks to the x86 memory model);
8426 // - it performs additional checks that we do not do here for
8427 // performance reasons.
8428
8429 Register ref_reg = ref.AsRegister<Register>();
8430 uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
8431
8432 // Given the numeric representation, it's enough to check the low bit of the rb_state.
8433 static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0");
8434 static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
8435 constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte;
8436 constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte;
8437 constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position);
8438
8439 // if (rb_state == ReadBarrier::GrayState())
8440 // ref = ReadBarrier::Mark(ref);
8441 // At this point, just do the "if" and make sure that flags are preserved until the branch.
8442 __ testb(Address(obj, monitor_offset + gray_byte_position), Immediate(test_value));
8443 if (needs_null_check) {
8444 MaybeRecordImplicitNullCheck(instruction);
8445 }
8446
8447 // Load fence to prevent load-load reordering.
8448 // Note that this is a no-op, thanks to the x86 memory model.
8449 GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
8450
8451 // The actual reference load.
8452 // /* HeapReference<Object> */ ref = *src
8453 __ movl(ref_reg, src); // Flags are unaffected.
8454
8455 // Note: Reference unpoisoning modifies the flags, so we need to delay it after the branch.
8456 // Slow path marking the object `ref` when it is gray.
8457 SlowPathCode* slow_path;
8458 if (always_update_field) {
8459 DCHECK(temp != nullptr);
8460 slow_path = new (GetScopedAllocator()) ReadBarrierMarkAndUpdateFieldSlowPathX86(
8461 instruction, ref, obj, src, /* unpoison_ref_before_marking= */ true, *temp);
8462 } else {
8463 slow_path = new (GetScopedAllocator()) ReadBarrierMarkSlowPathX86(
8464 instruction, ref, /* unpoison_ref_before_marking= */ true);
8465 }
8466 AddSlowPath(slow_path);
8467
8468 // We have done the "if" of the gray bit check above, now branch based on the flags.
8469 __ j(kNotZero, slow_path->GetEntryLabel());
8470
8471 // Object* ref = ref_addr->AsMirrorPtr()
8472 __ MaybeUnpoisonHeapReference(ref_reg);
8473
8474 __ Bind(slow_path->GetExitLabel());
8475 }
8476
GenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)8477 void CodeGeneratorX86::GenerateReadBarrierSlow(HInstruction* instruction,
8478 Location out,
8479 Location ref,
8480 Location obj,
8481 uint32_t offset,
8482 Location index) {
8483 DCHECK(kEmitCompilerReadBarrier);
8484
8485 // Insert a slow path based read barrier *after* the reference load.
8486 //
8487 // If heap poisoning is enabled, the unpoisoning of the loaded
8488 // reference will be carried out by the runtime within the slow
8489 // path.
8490 //
8491 // Note that `ref` currently does not get unpoisoned (when heap
8492 // poisoning is enabled), which is alright as the `ref` argument is
8493 // not used by the artReadBarrierSlow entry point.
8494 //
8495 // TODO: Unpoison `ref` when it is used by artReadBarrierSlow.
8496 SlowPathCode* slow_path = new (GetScopedAllocator())
8497 ReadBarrierForHeapReferenceSlowPathX86(instruction, out, ref, obj, offset, index);
8498 AddSlowPath(slow_path);
8499
8500 __ jmp(slow_path->GetEntryLabel());
8501 __ Bind(slow_path->GetExitLabel());
8502 }
8503
MaybeGenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)8504 void CodeGeneratorX86::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
8505 Location out,
8506 Location ref,
8507 Location obj,
8508 uint32_t offset,
8509 Location index) {
8510 if (kEmitCompilerReadBarrier) {
8511 // Baker's read barriers shall be handled by the fast path
8512 // (CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier).
8513 DCHECK(!kUseBakerReadBarrier);
8514 // If heap poisoning is enabled, unpoisoning will be taken care of
8515 // by the runtime within the slow path.
8516 GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index);
8517 } else if (kPoisonHeapReferences) {
8518 __ UnpoisonHeapReference(out.AsRegister<Register>());
8519 }
8520 }
8521
GenerateReadBarrierForRootSlow(HInstruction * instruction,Location out,Location root)8522 void CodeGeneratorX86::GenerateReadBarrierForRootSlow(HInstruction* instruction,
8523 Location out,
8524 Location root) {
8525 DCHECK(kEmitCompilerReadBarrier);
8526
8527 // Insert a slow path based read barrier *after* the GC root load.
8528 //
8529 // Note that GC roots are not affected by heap poisoning, so we do
8530 // not need to do anything special for this here.
8531 SlowPathCode* slow_path =
8532 new (GetScopedAllocator()) ReadBarrierForRootSlowPathX86(instruction, out, root);
8533 AddSlowPath(slow_path);
8534
8535 __ jmp(slow_path->GetEntryLabel());
8536 __ Bind(slow_path->GetExitLabel());
8537 }
8538
VisitBoundType(HBoundType * instruction ATTRIBUTE_UNUSED)8539 void LocationsBuilderX86::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
8540 // Nothing to do, this should be removed during prepare for register allocator.
8541 LOG(FATAL) << "Unreachable";
8542 }
8543
VisitBoundType(HBoundType * instruction ATTRIBUTE_UNUSED)8544 void InstructionCodeGeneratorX86::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
8545 // Nothing to do, this should be removed during prepare for register allocator.
8546 LOG(FATAL) << "Unreachable";
8547 }
8548
8549 // Simple implementation of packed switch - generate cascaded compare/jumps.
VisitPackedSwitch(HPackedSwitch * switch_instr)8550 void LocationsBuilderX86::VisitPackedSwitch(HPackedSwitch* switch_instr) {
8551 LocationSummary* locations =
8552 new (GetGraph()->GetAllocator()) LocationSummary(switch_instr, LocationSummary::kNoCall);
8553 locations->SetInAt(0, Location::RequiresRegister());
8554 }
8555
GenPackedSwitchWithCompares(Register value_reg,int32_t lower_bound,uint32_t num_entries,HBasicBlock * switch_block,HBasicBlock * default_block)8556 void InstructionCodeGeneratorX86::GenPackedSwitchWithCompares(Register value_reg,
8557 int32_t lower_bound,
8558 uint32_t num_entries,
8559 HBasicBlock* switch_block,
8560 HBasicBlock* default_block) {
8561 // Figure out the correct compare values and jump conditions.
8562 // Handle the first compare/branch as a special case because it might
8563 // jump to the default case.
8564 DCHECK_GT(num_entries, 2u);
8565 Condition first_condition;
8566 uint32_t index;
8567 const ArenaVector<HBasicBlock*>& successors = switch_block->GetSuccessors();
8568 if (lower_bound != 0) {
8569 first_condition = kLess;
8570 __ cmpl(value_reg, Immediate(lower_bound));
8571 __ j(first_condition, codegen_->GetLabelOf(default_block));
8572 __ j(kEqual, codegen_->GetLabelOf(successors[0]));
8573
8574 index = 1;
8575 } else {
8576 // Handle all the compare/jumps below.
8577 first_condition = kBelow;
8578 index = 0;
8579 }
8580
8581 // Handle the rest of the compare/jumps.
8582 for (; index + 1 < num_entries; index += 2) {
8583 int32_t compare_to_value = lower_bound + index + 1;
8584 __ cmpl(value_reg, Immediate(compare_to_value));
8585 // Jump to successors[index] if value < case_value[index].
8586 __ j(first_condition, codegen_->GetLabelOf(successors[index]));
8587 // Jump to successors[index + 1] if value == case_value[index + 1].
8588 __ j(kEqual, codegen_->GetLabelOf(successors[index + 1]));
8589 }
8590
8591 if (index != num_entries) {
8592 // There are an odd number of entries. Handle the last one.
8593 DCHECK_EQ(index + 1, num_entries);
8594 __ cmpl(value_reg, Immediate(lower_bound + index));
8595 __ j(kEqual, codegen_->GetLabelOf(successors[index]));
8596 }
8597
8598 // And the default for any other value.
8599 if (!codegen_->GoesToNextBlock(switch_block, default_block)) {
8600 __ jmp(codegen_->GetLabelOf(default_block));
8601 }
8602 }
8603
VisitPackedSwitch(HPackedSwitch * switch_instr)8604 void InstructionCodeGeneratorX86::VisitPackedSwitch(HPackedSwitch* switch_instr) {
8605 int32_t lower_bound = switch_instr->GetStartValue();
8606 uint32_t num_entries = switch_instr->GetNumEntries();
8607 LocationSummary* locations = switch_instr->GetLocations();
8608 Register value_reg = locations->InAt(0).AsRegister<Register>();
8609
8610 GenPackedSwitchWithCompares(value_reg,
8611 lower_bound,
8612 num_entries,
8613 switch_instr->GetBlock(),
8614 switch_instr->GetDefaultBlock());
8615 }
8616
VisitX86PackedSwitch(HX86PackedSwitch * switch_instr)8617 void LocationsBuilderX86::VisitX86PackedSwitch(HX86PackedSwitch* switch_instr) {
8618 LocationSummary* locations =
8619 new (GetGraph()->GetAllocator()) LocationSummary(switch_instr, LocationSummary::kNoCall);
8620 locations->SetInAt(0, Location::RequiresRegister());
8621
8622 // Constant area pointer.
8623 locations->SetInAt(1, Location::RequiresRegister());
8624
8625 // And the temporary we need.
8626 locations->AddTemp(Location::RequiresRegister());
8627 }
8628
VisitX86PackedSwitch(HX86PackedSwitch * switch_instr)8629 void InstructionCodeGeneratorX86::VisitX86PackedSwitch(HX86PackedSwitch* switch_instr) {
8630 int32_t lower_bound = switch_instr->GetStartValue();
8631 uint32_t num_entries = switch_instr->GetNumEntries();
8632 LocationSummary* locations = switch_instr->GetLocations();
8633 Register value_reg = locations->InAt(0).AsRegister<Register>();
8634 HBasicBlock* default_block = switch_instr->GetDefaultBlock();
8635
8636 if (num_entries <= kPackedSwitchJumpTableThreshold) {
8637 GenPackedSwitchWithCompares(value_reg,
8638 lower_bound,
8639 num_entries,
8640 switch_instr->GetBlock(),
8641 default_block);
8642 return;
8643 }
8644
8645 // Optimizing has a jump area.
8646 Register temp_reg = locations->GetTemp(0).AsRegister<Register>();
8647 Register constant_area = locations->InAt(1).AsRegister<Register>();
8648
8649 // Remove the bias, if needed.
8650 if (lower_bound != 0) {
8651 __ leal(temp_reg, Address(value_reg, -lower_bound));
8652 value_reg = temp_reg;
8653 }
8654
8655 // Is the value in range?
8656 DCHECK_GE(num_entries, 1u);
8657 __ cmpl(value_reg, Immediate(num_entries - 1));
8658 __ j(kAbove, codegen_->GetLabelOf(default_block));
8659
8660 // We are in the range of the table.
8661 // Load (target-constant_area) from the jump table, indexing by the value.
8662 __ movl(temp_reg, codegen_->LiteralCaseTable(switch_instr, constant_area, value_reg));
8663
8664 // Compute the actual target address by adding in constant_area.
8665 __ addl(temp_reg, constant_area);
8666
8667 // And jump.
8668 __ jmp(temp_reg);
8669 }
8670
VisitX86ComputeBaseMethodAddress(HX86ComputeBaseMethodAddress * insn)8671 void LocationsBuilderX86::VisitX86ComputeBaseMethodAddress(
8672 HX86ComputeBaseMethodAddress* insn) {
8673 LocationSummary* locations =
8674 new (GetGraph()->GetAllocator()) LocationSummary(insn, LocationSummary::kNoCall);
8675 locations->SetOut(Location::RequiresRegister());
8676 }
8677
VisitX86ComputeBaseMethodAddress(HX86ComputeBaseMethodAddress * insn)8678 void InstructionCodeGeneratorX86::VisitX86ComputeBaseMethodAddress(
8679 HX86ComputeBaseMethodAddress* insn) {
8680 LocationSummary* locations = insn->GetLocations();
8681 Register reg = locations->Out().AsRegister<Register>();
8682
8683 // Generate call to next instruction.
8684 Label next_instruction;
8685 __ call(&next_instruction);
8686 __ Bind(&next_instruction);
8687
8688 // Remember this offset for later use with constant area.
8689 codegen_->AddMethodAddressOffset(insn, GetAssembler()->CodeSize());
8690
8691 // Grab the return address off the stack.
8692 __ popl(reg);
8693 }
8694
VisitX86LoadFromConstantTable(HX86LoadFromConstantTable * insn)8695 void LocationsBuilderX86::VisitX86LoadFromConstantTable(
8696 HX86LoadFromConstantTable* insn) {
8697 LocationSummary* locations =
8698 new (GetGraph()->GetAllocator()) LocationSummary(insn, LocationSummary::kNoCall);
8699
8700 locations->SetInAt(0, Location::RequiresRegister());
8701 locations->SetInAt(1, Location::ConstantLocation(insn->GetConstant()));
8702
8703 // If we don't need to be materialized, we only need the inputs to be set.
8704 if (insn->IsEmittedAtUseSite()) {
8705 return;
8706 }
8707
8708 switch (insn->GetType()) {
8709 case DataType::Type::kFloat32:
8710 case DataType::Type::kFloat64:
8711 locations->SetOut(Location::RequiresFpuRegister());
8712 break;
8713
8714 case DataType::Type::kInt32:
8715 locations->SetOut(Location::RequiresRegister());
8716 break;
8717
8718 default:
8719 LOG(FATAL) << "Unsupported x86 constant area type " << insn->GetType();
8720 }
8721 }
8722
VisitX86LoadFromConstantTable(HX86LoadFromConstantTable * insn)8723 void InstructionCodeGeneratorX86::VisitX86LoadFromConstantTable(HX86LoadFromConstantTable* insn) {
8724 if (insn->IsEmittedAtUseSite()) {
8725 return;
8726 }
8727
8728 LocationSummary* locations = insn->GetLocations();
8729 Location out = locations->Out();
8730 Register const_area = locations->InAt(0).AsRegister<Register>();
8731 HConstant *value = insn->GetConstant();
8732
8733 switch (insn->GetType()) {
8734 case DataType::Type::kFloat32:
8735 __ movss(out.AsFpuRegister<XmmRegister>(),
8736 codegen_->LiteralFloatAddress(
8737 value->AsFloatConstant()->GetValue(), insn->GetBaseMethodAddress(), const_area));
8738 break;
8739
8740 case DataType::Type::kFloat64:
8741 __ movsd(out.AsFpuRegister<XmmRegister>(),
8742 codegen_->LiteralDoubleAddress(
8743 value->AsDoubleConstant()->GetValue(), insn->GetBaseMethodAddress(), const_area));
8744 break;
8745
8746 case DataType::Type::kInt32:
8747 __ movl(out.AsRegister<Register>(),
8748 codegen_->LiteralInt32Address(
8749 value->AsIntConstant()->GetValue(), insn->GetBaseMethodAddress(), const_area));
8750 break;
8751
8752 default:
8753 LOG(FATAL) << "Unsupported x86 constant area type " << insn->GetType();
8754 }
8755 }
8756
8757 /**
8758 * Class to handle late fixup of offsets into constant area.
8759 */
8760 class RIPFixup : public AssemblerFixup, public ArenaObject<kArenaAllocCodeGenerator> {
8761 public:
RIPFixup(CodeGeneratorX86 & codegen,HX86ComputeBaseMethodAddress * base_method_address,size_t offset)8762 RIPFixup(CodeGeneratorX86& codegen,
8763 HX86ComputeBaseMethodAddress* base_method_address,
8764 size_t offset)
8765 : codegen_(&codegen),
8766 base_method_address_(base_method_address),
8767 offset_into_constant_area_(offset) {}
8768
8769 protected:
SetOffset(size_t offset)8770 void SetOffset(size_t offset) { offset_into_constant_area_ = offset; }
8771
8772 CodeGeneratorX86* codegen_;
8773 HX86ComputeBaseMethodAddress* base_method_address_;
8774
8775 private:
Process(const MemoryRegion & region,int pos)8776 void Process(const MemoryRegion& region, int pos) override {
8777 // Patch the correct offset for the instruction. The place to patch is the
8778 // last 4 bytes of the instruction.
8779 // The value to patch is the distance from the offset in the constant area
8780 // from the address computed by the HX86ComputeBaseMethodAddress instruction.
8781 int32_t constant_offset = codegen_->ConstantAreaStart() + offset_into_constant_area_;
8782 int32_t relative_position =
8783 constant_offset - codegen_->GetMethodAddressOffset(base_method_address_);
8784
8785 // Patch in the right value.
8786 region.StoreUnaligned<int32_t>(pos - 4, relative_position);
8787 }
8788
8789 // Location in constant area that the fixup refers to.
8790 int32_t offset_into_constant_area_;
8791 };
8792
8793 /**
8794 * Class to handle late fixup of offsets to a jump table that will be created in the
8795 * constant area.
8796 */
8797 class JumpTableRIPFixup : public RIPFixup {
8798 public:
JumpTableRIPFixup(CodeGeneratorX86 & codegen,HX86PackedSwitch * switch_instr)8799 JumpTableRIPFixup(CodeGeneratorX86& codegen, HX86PackedSwitch* switch_instr)
8800 : RIPFixup(codegen, switch_instr->GetBaseMethodAddress(), static_cast<size_t>(-1)),
8801 switch_instr_(switch_instr) {}
8802
CreateJumpTable()8803 void CreateJumpTable() {
8804 X86Assembler* assembler = codegen_->GetAssembler();
8805
8806 // Ensure that the reference to the jump table has the correct offset.
8807 const int32_t offset_in_constant_table = assembler->ConstantAreaSize();
8808 SetOffset(offset_in_constant_table);
8809
8810 // The label values in the jump table are computed relative to the
8811 // instruction addressing the constant area.
8812 const int32_t relative_offset = codegen_->GetMethodAddressOffset(base_method_address_);
8813
8814 // Populate the jump table with the correct values for the jump table.
8815 int32_t num_entries = switch_instr_->GetNumEntries();
8816 HBasicBlock* block = switch_instr_->GetBlock();
8817 const ArenaVector<HBasicBlock*>& successors = block->GetSuccessors();
8818 // The value that we want is the target offset - the position of the table.
8819 for (int32_t i = 0; i < num_entries; i++) {
8820 HBasicBlock* b = successors[i];
8821 Label* l = codegen_->GetLabelOf(b);
8822 DCHECK(l->IsBound());
8823 int32_t offset_to_block = l->Position() - relative_offset;
8824 assembler->AppendInt32(offset_to_block);
8825 }
8826 }
8827
8828 private:
8829 const HX86PackedSwitch* switch_instr_;
8830 };
8831
Finalize(CodeAllocator * allocator)8832 void CodeGeneratorX86::Finalize(CodeAllocator* allocator) {
8833 // Generate the constant area if needed.
8834 X86Assembler* assembler = GetAssembler();
8835
8836 if (!assembler->IsConstantAreaEmpty() || !fixups_to_jump_tables_.empty()) {
8837 // Align to 4 byte boundary to reduce cache misses, as the data is 4 and 8
8838 // byte values.
8839 assembler->Align(4, 0);
8840 constant_area_start_ = assembler->CodeSize();
8841
8842 // Populate any jump tables.
8843 for (JumpTableRIPFixup* jump_table : fixups_to_jump_tables_) {
8844 jump_table->CreateJumpTable();
8845 }
8846
8847 // And now add the constant area to the generated code.
8848 assembler->AddConstantArea();
8849 }
8850
8851 // And finish up.
8852 CodeGenerator::Finalize(allocator);
8853 }
8854
LiteralDoubleAddress(double v,HX86ComputeBaseMethodAddress * method_base,Register reg)8855 Address CodeGeneratorX86::LiteralDoubleAddress(double v,
8856 HX86ComputeBaseMethodAddress* method_base,
8857 Register reg) {
8858 AssemblerFixup* fixup =
8859 new (GetGraph()->GetAllocator()) RIPFixup(*this, method_base, __ AddDouble(v));
8860 return Address(reg, kPlaceholder32BitOffset, fixup);
8861 }
8862
LiteralFloatAddress(float v,HX86ComputeBaseMethodAddress * method_base,Register reg)8863 Address CodeGeneratorX86::LiteralFloatAddress(float v,
8864 HX86ComputeBaseMethodAddress* method_base,
8865 Register reg) {
8866 AssemblerFixup* fixup =
8867 new (GetGraph()->GetAllocator()) RIPFixup(*this, method_base, __ AddFloat(v));
8868 return Address(reg, kPlaceholder32BitOffset, fixup);
8869 }
8870
LiteralInt32Address(int32_t v,HX86ComputeBaseMethodAddress * method_base,Register reg)8871 Address CodeGeneratorX86::LiteralInt32Address(int32_t v,
8872 HX86ComputeBaseMethodAddress* method_base,
8873 Register reg) {
8874 AssemblerFixup* fixup =
8875 new (GetGraph()->GetAllocator()) RIPFixup(*this, method_base, __ AddInt32(v));
8876 return Address(reg, kPlaceholder32BitOffset, fixup);
8877 }
8878
LiteralInt64Address(int64_t v,HX86ComputeBaseMethodAddress * method_base,Register reg)8879 Address CodeGeneratorX86::LiteralInt64Address(int64_t v,
8880 HX86ComputeBaseMethodAddress* method_base,
8881 Register reg) {
8882 AssemblerFixup* fixup =
8883 new (GetGraph()->GetAllocator()) RIPFixup(*this, method_base, __ AddInt64(v));
8884 return Address(reg, kPlaceholder32BitOffset, fixup);
8885 }
8886
Load32BitValue(Register dest,int32_t value)8887 void CodeGeneratorX86::Load32BitValue(Register dest, int32_t value) {
8888 if (value == 0) {
8889 __ xorl(dest, dest);
8890 } else {
8891 __ movl(dest, Immediate(value));
8892 }
8893 }
8894
Compare32BitValue(Register dest,int32_t value)8895 void CodeGeneratorX86::Compare32BitValue(Register dest, int32_t value) {
8896 if (value == 0) {
8897 __ testl(dest, dest);
8898 } else {
8899 __ cmpl(dest, Immediate(value));
8900 }
8901 }
8902
GenerateIntCompare(Location lhs,Location rhs)8903 void CodeGeneratorX86::GenerateIntCompare(Location lhs, Location rhs) {
8904 Register lhs_reg = lhs.AsRegister<Register>();
8905 GenerateIntCompare(lhs_reg, rhs);
8906 }
8907
GenerateIntCompare(Register lhs,Location rhs)8908 void CodeGeneratorX86::GenerateIntCompare(Register lhs, Location rhs) {
8909 if (rhs.IsConstant()) {
8910 int32_t value = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
8911 Compare32BitValue(lhs, value);
8912 } else if (rhs.IsStackSlot()) {
8913 __ cmpl(lhs, Address(ESP, rhs.GetStackIndex()));
8914 } else {
8915 __ cmpl(lhs, rhs.AsRegister<Register>());
8916 }
8917 }
8918
ArrayAddress(Register obj,Location index,ScaleFactor scale,uint32_t data_offset)8919 Address CodeGeneratorX86::ArrayAddress(Register obj,
8920 Location index,
8921 ScaleFactor scale,
8922 uint32_t data_offset) {
8923 return index.IsConstant() ?
8924 Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << scale) + data_offset) :
8925 Address(obj, index.AsRegister<Register>(), scale, data_offset);
8926 }
8927
LiteralCaseTable(HX86PackedSwitch * switch_instr,Register reg,Register value)8928 Address CodeGeneratorX86::LiteralCaseTable(HX86PackedSwitch* switch_instr,
8929 Register reg,
8930 Register value) {
8931 // Create a fixup to be used to create and address the jump table.
8932 JumpTableRIPFixup* table_fixup =
8933 new (GetGraph()->GetAllocator()) JumpTableRIPFixup(*this, switch_instr);
8934
8935 // We have to populate the jump tables.
8936 fixups_to_jump_tables_.push_back(table_fixup);
8937
8938 // We want a scaled address, as we are extracting the correct offset from the table.
8939 return Address(reg, value, TIMES_4, kPlaceholder32BitOffset, table_fixup);
8940 }
8941
8942 // TODO: target as memory.
MoveFromReturnRegister(Location target,DataType::Type type)8943 void CodeGeneratorX86::MoveFromReturnRegister(Location target, DataType::Type type) {
8944 if (!target.IsValid()) {
8945 DCHECK_EQ(type, DataType::Type::kVoid);
8946 return;
8947 }
8948
8949 DCHECK_NE(type, DataType::Type::kVoid);
8950
8951 Location return_loc = InvokeDexCallingConventionVisitorX86().GetReturnLocation(type);
8952 if (target.Equals(return_loc)) {
8953 return;
8954 }
8955
8956 // TODO: Consider pairs in the parallel move resolver, then this could be nicely merged
8957 // with the else branch.
8958 if (type == DataType::Type::kInt64) {
8959 HParallelMove parallel_move(GetGraph()->GetAllocator());
8960 parallel_move.AddMove(return_loc.ToLow(), target.ToLow(), DataType::Type::kInt32, nullptr);
8961 parallel_move.AddMove(return_loc.ToHigh(), target.ToHigh(), DataType::Type::kInt32, nullptr);
8962 GetMoveResolver()->EmitNativeCode(¶llel_move);
8963 } else {
8964 // Let the parallel move resolver take care of all of this.
8965 HParallelMove parallel_move(GetGraph()->GetAllocator());
8966 parallel_move.AddMove(return_loc, target, type, nullptr);
8967 GetMoveResolver()->EmitNativeCode(¶llel_move);
8968 }
8969 }
8970
PatchJitRootUse(uint8_t * code,const uint8_t * roots_data,const PatchInfo<Label> & info,uint64_t index_in_table) const8971 void CodeGeneratorX86::PatchJitRootUse(uint8_t* code,
8972 const uint8_t* roots_data,
8973 const PatchInfo<Label>& info,
8974 uint64_t index_in_table) const {
8975 uint32_t code_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
8976 uintptr_t address =
8977 reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>);
8978 using unaligned_uint32_t __attribute__((__aligned__(1))) = uint32_t;
8979 reinterpret_cast<unaligned_uint32_t*>(code + code_offset)[0] =
8980 dchecked_integral_cast<uint32_t>(address);
8981 }
8982
EmitJitRootPatches(uint8_t * code,const uint8_t * roots_data)8983 void CodeGeneratorX86::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
8984 for (const PatchInfo<Label>& info : jit_string_patches_) {
8985 StringReference string_reference(info.target_dex_file, dex::StringIndex(info.offset_or_index));
8986 uint64_t index_in_table = GetJitStringRootIndex(string_reference);
8987 PatchJitRootUse(code, roots_data, info, index_in_table);
8988 }
8989
8990 for (const PatchInfo<Label>& info : jit_class_patches_) {
8991 TypeReference type_reference(info.target_dex_file, dex::TypeIndex(info.offset_or_index));
8992 uint64_t index_in_table = GetJitClassRootIndex(type_reference);
8993 PatchJitRootUse(code, roots_data, info, index_in_table);
8994 }
8995 }
8996
VisitIntermediateAddress(HIntermediateAddress * instruction ATTRIBUTE_UNUSED)8997 void LocationsBuilderX86::VisitIntermediateAddress(HIntermediateAddress* instruction
8998 ATTRIBUTE_UNUSED) {
8999 LOG(FATAL) << "Unreachable";
9000 }
9001
VisitIntermediateAddress(HIntermediateAddress * instruction ATTRIBUTE_UNUSED)9002 void InstructionCodeGeneratorX86::VisitIntermediateAddress(HIntermediateAddress* instruction
9003 ATTRIBUTE_UNUSED) {
9004 LOG(FATAL) << "Unreachable";
9005 }
9006
CpuHasAvxFeatureFlag()9007 bool LocationsBuilderX86::CpuHasAvxFeatureFlag() {
9008 return codegen_->GetInstructionSetFeatures().HasAVX();
9009 }
CpuHasAvx2FeatureFlag()9010 bool LocationsBuilderX86::CpuHasAvx2FeatureFlag() {
9011 return codegen_->GetInstructionSetFeatures().HasAVX2();
9012 }
CpuHasAvxFeatureFlag()9013 bool InstructionCodeGeneratorX86::CpuHasAvxFeatureFlag() {
9014 return codegen_->GetInstructionSetFeatures().HasAVX();
9015 }
CpuHasAvx2FeatureFlag()9016 bool InstructionCodeGeneratorX86::CpuHasAvx2FeatureFlag() {
9017 return codegen_->GetInstructionSetFeatures().HasAVX2();
9018 }
9019
9020 #undef __
9021
9022 } // namespace x86
9023 } // namespace art
9024