1 /*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "code_generator_x86_64.h"
18
19 #include "art_method.h"
20 #include "code_generator_utils.h"
21 #include "compiled_method.h"
22 #include "entrypoints/quick/quick_entrypoints.h"
23 #include "gc/accounting/card_table.h"
24 #include "intrinsics.h"
25 #include "intrinsics_x86_64.h"
26 #include "mirror/array-inl.h"
27 #include "mirror/class-inl.h"
28 #include "mirror/object_reference.h"
29 #include "thread.h"
30 #include "utils/assembler.h"
31 #include "utils/stack_checks.h"
32 #include "utils/x86_64/assembler_x86_64.h"
33 #include "utils/x86_64/managed_register_x86_64.h"
34
35 namespace art {
36
37 template<class MirrorType>
38 class GcRoot;
39
40 namespace x86_64 {
41
42 static constexpr int kCurrentMethodStackOffset = 0;
43 static constexpr Register kMethodRegisterArgument = RDI;
44 // The compare/jump sequence will generate about (1.5 * num_entries) instructions. A jump
45 // table version generates 7 instructions and num_entries literals. Compare/jump sequence will
46 // generates less code/data with a small num_entries.
47 static constexpr uint32_t kPackedSwitchJumpTableThreshold = 5;
48
49 static constexpr Register kCoreCalleeSaves[] = { RBX, RBP, R12, R13, R14, R15 };
50 static constexpr FloatRegister kFpuCalleeSaves[] = { XMM12, XMM13, XMM14, XMM15 };
51
52 static constexpr int kC2ConditionMask = 0x400;
53
54 #define __ down_cast<X86_64Assembler*>(codegen->GetAssembler())->
55 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, x).Int32Value()
56
57 class NullCheckSlowPathX86_64 : public SlowPathCode {
58 public:
NullCheckSlowPathX86_64(HNullCheck * instruction)59 explicit NullCheckSlowPathX86_64(HNullCheck* instruction) : SlowPathCode(instruction) {}
60
EmitNativeCode(CodeGenerator * codegen)61 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
62 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
63 __ Bind(GetEntryLabel());
64 if (instruction_->CanThrowIntoCatchBlock()) {
65 // Live registers will be restored in the catch block if caught.
66 SaveLiveRegisters(codegen, instruction_->GetLocations());
67 }
68 x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowNullPointer),
69 instruction_,
70 instruction_->GetDexPc(),
71 this);
72 CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
73 }
74
IsFatal() const75 bool IsFatal() const OVERRIDE { return true; }
76
GetDescription() const77 const char* GetDescription() const OVERRIDE { return "NullCheckSlowPathX86_64"; }
78
79 private:
80 DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathX86_64);
81 };
82
83 class DivZeroCheckSlowPathX86_64 : public SlowPathCode {
84 public:
DivZeroCheckSlowPathX86_64(HDivZeroCheck * instruction)85 explicit DivZeroCheckSlowPathX86_64(HDivZeroCheck* instruction) : SlowPathCode(instruction) {}
86
EmitNativeCode(CodeGenerator * codegen)87 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
88 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
89 __ Bind(GetEntryLabel());
90 if (instruction_->CanThrowIntoCatchBlock()) {
91 // Live registers will be restored in the catch block if caught.
92 SaveLiveRegisters(codegen, instruction_->GetLocations());
93 }
94 x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowDivZero),
95 instruction_,
96 instruction_->GetDexPc(),
97 this);
98 CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
99 }
100
IsFatal() const101 bool IsFatal() const OVERRIDE { return true; }
102
GetDescription() const103 const char* GetDescription() const OVERRIDE { return "DivZeroCheckSlowPathX86_64"; }
104
105 private:
106 DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathX86_64);
107 };
108
109 class DivRemMinusOneSlowPathX86_64 : public SlowPathCode {
110 public:
DivRemMinusOneSlowPathX86_64(HInstruction * at,Register reg,Primitive::Type type,bool is_div)111 DivRemMinusOneSlowPathX86_64(HInstruction* at, Register reg, Primitive::Type type, bool is_div)
112 : SlowPathCode(at), cpu_reg_(CpuRegister(reg)), type_(type), is_div_(is_div) {}
113
EmitNativeCode(CodeGenerator * codegen)114 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
115 __ Bind(GetEntryLabel());
116 if (type_ == Primitive::kPrimInt) {
117 if (is_div_) {
118 __ negl(cpu_reg_);
119 } else {
120 __ xorl(cpu_reg_, cpu_reg_);
121 }
122
123 } else {
124 DCHECK_EQ(Primitive::kPrimLong, type_);
125 if (is_div_) {
126 __ negq(cpu_reg_);
127 } else {
128 __ xorl(cpu_reg_, cpu_reg_);
129 }
130 }
131 __ jmp(GetExitLabel());
132 }
133
GetDescription() const134 const char* GetDescription() const OVERRIDE { return "DivRemMinusOneSlowPathX86_64"; }
135
136 private:
137 const CpuRegister cpu_reg_;
138 const Primitive::Type type_;
139 const bool is_div_;
140 DISALLOW_COPY_AND_ASSIGN(DivRemMinusOneSlowPathX86_64);
141 };
142
143 class SuspendCheckSlowPathX86_64 : public SlowPathCode {
144 public:
SuspendCheckSlowPathX86_64(HSuspendCheck * instruction,HBasicBlock * successor)145 SuspendCheckSlowPathX86_64(HSuspendCheck* instruction, HBasicBlock* successor)
146 : SlowPathCode(instruction), successor_(successor) {}
147
EmitNativeCode(CodeGenerator * codegen)148 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
149 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
150 __ Bind(GetEntryLabel());
151 SaveLiveRegisters(codegen, instruction_->GetLocations());
152 x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pTestSuspend),
153 instruction_,
154 instruction_->GetDexPc(),
155 this);
156 CheckEntrypointTypes<kQuickTestSuspend, void, void>();
157 RestoreLiveRegisters(codegen, instruction_->GetLocations());
158 if (successor_ == nullptr) {
159 __ jmp(GetReturnLabel());
160 } else {
161 __ jmp(x86_64_codegen->GetLabelOf(successor_));
162 }
163 }
164
GetReturnLabel()165 Label* GetReturnLabel() {
166 DCHECK(successor_ == nullptr);
167 return &return_label_;
168 }
169
GetSuccessor() const170 HBasicBlock* GetSuccessor() const {
171 return successor_;
172 }
173
GetDescription() const174 const char* GetDescription() const OVERRIDE { return "SuspendCheckSlowPathX86_64"; }
175
176 private:
177 HBasicBlock* const successor_;
178 Label return_label_;
179
180 DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathX86_64);
181 };
182
183 class BoundsCheckSlowPathX86_64 : public SlowPathCode {
184 public:
BoundsCheckSlowPathX86_64(HBoundsCheck * instruction)185 explicit BoundsCheckSlowPathX86_64(HBoundsCheck* instruction)
186 : SlowPathCode(instruction) {}
187
EmitNativeCode(CodeGenerator * codegen)188 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
189 LocationSummary* locations = instruction_->GetLocations();
190 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
191 __ Bind(GetEntryLabel());
192 if (instruction_->CanThrowIntoCatchBlock()) {
193 // Live registers will be restored in the catch block if caught.
194 SaveLiveRegisters(codegen, instruction_->GetLocations());
195 }
196 // We're moving two locations to locations that could overlap, so we need a parallel
197 // move resolver.
198 InvokeRuntimeCallingConvention calling_convention;
199 codegen->EmitParallelMoves(
200 locations->InAt(0),
201 Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
202 Primitive::kPrimInt,
203 locations->InAt(1),
204 Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
205 Primitive::kPrimInt);
206 x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowArrayBounds),
207 instruction_,
208 instruction_->GetDexPc(),
209 this);
210 CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
211 }
212
IsFatal() const213 bool IsFatal() const OVERRIDE { return true; }
214
GetDescription() const215 const char* GetDescription() const OVERRIDE { return "BoundsCheckSlowPathX86_64"; }
216
217 private:
218 DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathX86_64);
219 };
220
221 class LoadClassSlowPathX86_64 : public SlowPathCode {
222 public:
LoadClassSlowPathX86_64(HLoadClass * cls,HInstruction * at,uint32_t dex_pc,bool do_clinit)223 LoadClassSlowPathX86_64(HLoadClass* cls,
224 HInstruction* at,
225 uint32_t dex_pc,
226 bool do_clinit)
227 : SlowPathCode(at), cls_(cls), at_(at), dex_pc_(dex_pc), do_clinit_(do_clinit) {
228 DCHECK(at->IsLoadClass() || at->IsClinitCheck());
229 }
230
EmitNativeCode(CodeGenerator * codegen)231 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
232 LocationSummary* locations = at_->GetLocations();
233 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
234 __ Bind(GetEntryLabel());
235
236 SaveLiveRegisters(codegen, locations);
237
238 InvokeRuntimeCallingConvention calling_convention;
239 __ movl(CpuRegister(calling_convention.GetRegisterAt(0)), Immediate(cls_->GetTypeIndex()));
240 x86_64_codegen->InvokeRuntime(do_clinit_ ?
241 QUICK_ENTRY_POINT(pInitializeStaticStorage) :
242 QUICK_ENTRY_POINT(pInitializeType),
243 at_,
244 dex_pc_,
245 this);
246 if (do_clinit_) {
247 CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>();
248 } else {
249 CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>();
250 }
251
252 Location out = locations->Out();
253 // Move the class to the desired location.
254 if (out.IsValid()) {
255 DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
256 x86_64_codegen->Move(out, Location::RegisterLocation(RAX));
257 }
258
259 RestoreLiveRegisters(codegen, locations);
260 __ jmp(GetExitLabel());
261 }
262
GetDescription() const263 const char* GetDescription() const OVERRIDE { return "LoadClassSlowPathX86_64"; }
264
265 private:
266 // The class this slow path will load.
267 HLoadClass* const cls_;
268
269 // The instruction where this slow path is happening.
270 // (Might be the load class or an initialization check).
271 HInstruction* const at_;
272
273 // The dex PC of `at_`.
274 const uint32_t dex_pc_;
275
276 // Whether to initialize the class.
277 const bool do_clinit_;
278
279 DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathX86_64);
280 };
281
282 class LoadStringSlowPathX86_64 : public SlowPathCode {
283 public:
LoadStringSlowPathX86_64(HLoadString * instruction)284 explicit LoadStringSlowPathX86_64(HLoadString* instruction) : SlowPathCode(instruction) {}
285
EmitNativeCode(CodeGenerator * codegen)286 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
287 LocationSummary* locations = instruction_->GetLocations();
288 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
289
290 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
291 __ Bind(GetEntryLabel());
292 SaveLiveRegisters(codegen, locations);
293
294 InvokeRuntimeCallingConvention calling_convention;
295 const uint32_t string_index = instruction_->AsLoadString()->GetStringIndex();
296 __ movl(CpuRegister(calling_convention.GetRegisterAt(0)), Immediate(string_index));
297 x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pResolveString),
298 instruction_,
299 instruction_->GetDexPc(),
300 this);
301 CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
302 x86_64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX));
303 RestoreLiveRegisters(codegen, locations);
304 __ jmp(GetExitLabel());
305 }
306
GetDescription() const307 const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathX86_64"; }
308
309 private:
310 DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathX86_64);
311 };
312
313 class TypeCheckSlowPathX86_64 : public SlowPathCode {
314 public:
TypeCheckSlowPathX86_64(HInstruction * instruction,bool is_fatal)315 TypeCheckSlowPathX86_64(HInstruction* instruction, bool is_fatal)
316 : SlowPathCode(instruction), is_fatal_(is_fatal) {}
317
EmitNativeCode(CodeGenerator * codegen)318 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
319 LocationSummary* locations = instruction_->GetLocations();
320 Location object_class = instruction_->IsCheckCast() ? locations->GetTemp(0)
321 : locations->Out();
322 uint32_t dex_pc = instruction_->GetDexPc();
323 DCHECK(instruction_->IsCheckCast()
324 || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
325
326 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
327 __ Bind(GetEntryLabel());
328
329 if (!is_fatal_) {
330 SaveLiveRegisters(codegen, locations);
331 }
332
333 // We're moving two locations to locations that could overlap, so we need a parallel
334 // move resolver.
335 InvokeRuntimeCallingConvention calling_convention;
336 codegen->EmitParallelMoves(
337 locations->InAt(1),
338 Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
339 Primitive::kPrimNot,
340 object_class,
341 Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
342 Primitive::kPrimNot);
343
344 if (instruction_->IsInstanceOf()) {
345 x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pInstanceofNonTrivial),
346 instruction_,
347 dex_pc,
348 this);
349 CheckEntrypointTypes<
350 kQuickInstanceofNonTrivial, uint32_t, const mirror::Class*, const mirror::Class*>();
351 } else {
352 DCHECK(instruction_->IsCheckCast());
353 x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast),
354 instruction_,
355 dex_pc,
356 this);
357 CheckEntrypointTypes<kQuickCheckCast, void, const mirror::Class*, const mirror::Class*>();
358 }
359
360 if (!is_fatal_) {
361 if (instruction_->IsInstanceOf()) {
362 x86_64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX));
363 }
364
365 RestoreLiveRegisters(codegen, locations);
366 __ jmp(GetExitLabel());
367 }
368 }
369
GetDescription() const370 const char* GetDescription() const OVERRIDE { return "TypeCheckSlowPathX86_64"; }
371
IsFatal() const372 bool IsFatal() const OVERRIDE { return is_fatal_; }
373
374 private:
375 const bool is_fatal_;
376
377 DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathX86_64);
378 };
379
380 class DeoptimizationSlowPathX86_64 : public SlowPathCode {
381 public:
DeoptimizationSlowPathX86_64(HDeoptimize * instruction)382 explicit DeoptimizationSlowPathX86_64(HDeoptimize* instruction)
383 : SlowPathCode(instruction) {}
384
EmitNativeCode(CodeGenerator * codegen)385 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
386 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
387 __ Bind(GetEntryLabel());
388 SaveLiveRegisters(codegen, instruction_->GetLocations());
389 x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize),
390 instruction_,
391 instruction_->GetDexPc(),
392 this);
393 CheckEntrypointTypes<kQuickDeoptimize, void, void>();
394 }
395
GetDescription() const396 const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathX86_64"; }
397
398 private:
399 DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathX86_64);
400 };
401
402 class ArraySetSlowPathX86_64 : public SlowPathCode {
403 public:
ArraySetSlowPathX86_64(HInstruction * instruction)404 explicit ArraySetSlowPathX86_64(HInstruction* instruction) : SlowPathCode(instruction) {}
405
EmitNativeCode(CodeGenerator * codegen)406 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
407 LocationSummary* locations = instruction_->GetLocations();
408 __ Bind(GetEntryLabel());
409 SaveLiveRegisters(codegen, locations);
410
411 InvokeRuntimeCallingConvention calling_convention;
412 HParallelMove parallel_move(codegen->GetGraph()->GetArena());
413 parallel_move.AddMove(
414 locations->InAt(0),
415 Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
416 Primitive::kPrimNot,
417 nullptr);
418 parallel_move.AddMove(
419 locations->InAt(1),
420 Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
421 Primitive::kPrimInt,
422 nullptr);
423 parallel_move.AddMove(
424 locations->InAt(2),
425 Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
426 Primitive::kPrimNot,
427 nullptr);
428 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move);
429
430 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
431 x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pAputObject),
432 instruction_,
433 instruction_->GetDexPc(),
434 this);
435 CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
436 RestoreLiveRegisters(codegen, locations);
437 __ jmp(GetExitLabel());
438 }
439
GetDescription() const440 const char* GetDescription() const OVERRIDE { return "ArraySetSlowPathX86_64"; }
441
442 private:
443 DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86_64);
444 };
445
446 // Slow path marking an object during a read barrier.
447 class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode {
448 public:
ReadBarrierMarkSlowPathX86_64(HInstruction * instruction,Location out,Location obj)449 ReadBarrierMarkSlowPathX86_64(HInstruction* instruction, Location out, Location obj)
450 : SlowPathCode(instruction), out_(out), obj_(obj) {
451 DCHECK(kEmitCompilerReadBarrier);
452 }
453
GetDescription() const454 const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathX86_64"; }
455
EmitNativeCode(CodeGenerator * codegen)456 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
457 LocationSummary* locations = instruction_->GetLocations();
458 Register reg_out = out_.AsRegister<Register>();
459 DCHECK(locations->CanCall());
460 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
461 DCHECK(instruction_->IsInstanceFieldGet() ||
462 instruction_->IsStaticFieldGet() ||
463 instruction_->IsArrayGet() ||
464 instruction_->IsLoadClass() ||
465 instruction_->IsLoadString() ||
466 instruction_->IsInstanceOf() ||
467 instruction_->IsCheckCast())
468 << "Unexpected instruction in read barrier marking slow path: "
469 << instruction_->DebugName();
470
471 __ Bind(GetEntryLabel());
472 SaveLiveRegisters(codegen, locations);
473
474 InvokeRuntimeCallingConvention calling_convention;
475 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
476 x86_64_codegen->Move(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), obj_);
477 x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierMark),
478 instruction_,
479 instruction_->GetDexPc(),
480 this);
481 CheckEntrypointTypes<kQuickReadBarrierMark, mirror::Object*, mirror::Object*>();
482 x86_64_codegen->Move(out_, Location::RegisterLocation(RAX));
483
484 RestoreLiveRegisters(codegen, locations);
485 __ jmp(GetExitLabel());
486 }
487
488 private:
489 const Location out_;
490 const Location obj_;
491
492 DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathX86_64);
493 };
494
495 // Slow path generating a read barrier for a heap reference.
496 class ReadBarrierForHeapReferenceSlowPathX86_64 : public SlowPathCode {
497 public:
ReadBarrierForHeapReferenceSlowPathX86_64(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)498 ReadBarrierForHeapReferenceSlowPathX86_64(HInstruction* instruction,
499 Location out,
500 Location ref,
501 Location obj,
502 uint32_t offset,
503 Location index)
504 : SlowPathCode(instruction),
505 out_(out),
506 ref_(ref),
507 obj_(obj),
508 offset_(offset),
509 index_(index) {
510 DCHECK(kEmitCompilerReadBarrier);
511 // If `obj` is equal to `out` or `ref`, it means the initial
512 // object has been overwritten by (or after) the heap object
513 // reference load to be instrumented, e.g.:
514 //
515 // __ movl(out, Address(out, offset));
516 // codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset);
517 //
518 // In that case, we have lost the information about the original
519 // object, and the emitted read barrier cannot work properly.
520 DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out;
521 DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref;
522 }
523
EmitNativeCode(CodeGenerator * codegen)524 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
525 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
526 LocationSummary* locations = instruction_->GetLocations();
527 CpuRegister reg_out = out_.AsRegister<CpuRegister>();
528 DCHECK(locations->CanCall());
529 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out.AsRegister())) << out_;
530 DCHECK(!instruction_->IsInvoke() ||
531 (instruction_->IsInvokeStaticOrDirect() &&
532 instruction_->GetLocations()->Intrinsified()))
533 << "Unexpected instruction in read barrier for heap reference slow path: "
534 << instruction_->DebugName();
535
536 __ Bind(GetEntryLabel());
537 SaveLiveRegisters(codegen, locations);
538
539 // We may have to change the index's value, but as `index_` is a
540 // constant member (like other "inputs" of this slow path),
541 // introduce a copy of it, `index`.
542 Location index = index_;
543 if (index_.IsValid()) {
544 // Handle `index_` for HArrayGet and intrinsic UnsafeGetObject.
545 if (instruction_->IsArrayGet()) {
546 // Compute real offset and store it in index_.
547 Register index_reg = index_.AsRegister<CpuRegister>().AsRegister();
548 DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_reg));
549 if (codegen->IsCoreCalleeSaveRegister(index_reg)) {
550 // We are about to change the value of `index_reg` (see the
551 // calls to art::x86_64::X86_64Assembler::shll and
552 // art::x86_64::X86_64Assembler::AddImmediate below), but it
553 // has not been saved by the previous call to
554 // art::SlowPathCode::SaveLiveRegisters, as it is a
555 // callee-save register --
556 // art::SlowPathCode::SaveLiveRegisters does not consider
557 // callee-save registers, as it has been designed with the
558 // assumption that callee-save registers are supposed to be
559 // handled by the called function. So, as a callee-save
560 // register, `index_reg` _would_ eventually be saved onto
561 // the stack, but it would be too late: we would have
562 // changed its value earlier. Therefore, we manually save
563 // it here into another freely available register,
564 // `free_reg`, chosen of course among the caller-save
565 // registers (as a callee-save `free_reg` register would
566 // exhibit the same problem).
567 //
568 // Note we could have requested a temporary register from
569 // the register allocator instead; but we prefer not to, as
570 // this is a slow path, and we know we can find a
571 // caller-save register that is available.
572 Register free_reg = FindAvailableCallerSaveRegister(codegen).AsRegister();
573 __ movl(CpuRegister(free_reg), CpuRegister(index_reg));
574 index_reg = free_reg;
575 index = Location::RegisterLocation(index_reg);
576 } else {
577 // The initial register stored in `index_` has already been
578 // saved in the call to art::SlowPathCode::SaveLiveRegisters
579 // (as it is not a callee-save register), so we can freely
580 // use it.
581 }
582 // Shifting the index value contained in `index_reg` by the
583 // scale factor (2) cannot overflow in practice, as the
584 // runtime is unable to allocate object arrays with a size
585 // larger than 2^26 - 1 (that is, 2^28 - 4 bytes).
586 __ shll(CpuRegister(index_reg), Immediate(TIMES_4));
587 static_assert(
588 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
589 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
590 __ AddImmediate(CpuRegister(index_reg), Immediate(offset_));
591 } else {
592 DCHECK(instruction_->IsInvoke());
593 DCHECK(instruction_->GetLocations()->Intrinsified());
594 DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) ||
595 (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile))
596 << instruction_->AsInvoke()->GetIntrinsic();
597 DCHECK_EQ(offset_, 0U);
598 DCHECK(index_.IsRegister());
599 }
600 }
601
602 // We're moving two or three locations to locations that could
603 // overlap, so we need a parallel move resolver.
604 InvokeRuntimeCallingConvention calling_convention;
605 HParallelMove parallel_move(codegen->GetGraph()->GetArena());
606 parallel_move.AddMove(ref_,
607 Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
608 Primitive::kPrimNot,
609 nullptr);
610 parallel_move.AddMove(obj_,
611 Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
612 Primitive::kPrimNot,
613 nullptr);
614 if (index.IsValid()) {
615 parallel_move.AddMove(index,
616 Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
617 Primitive::kPrimInt,
618 nullptr);
619 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move);
620 } else {
621 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move);
622 __ movl(CpuRegister(calling_convention.GetRegisterAt(2)), Immediate(offset_));
623 }
624 x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierSlow),
625 instruction_,
626 instruction_->GetDexPc(),
627 this);
628 CheckEntrypointTypes<
629 kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>();
630 x86_64_codegen->Move(out_, Location::RegisterLocation(RAX));
631
632 RestoreLiveRegisters(codegen, locations);
633 __ jmp(GetExitLabel());
634 }
635
GetDescription() const636 const char* GetDescription() const OVERRIDE {
637 return "ReadBarrierForHeapReferenceSlowPathX86_64";
638 }
639
640 private:
FindAvailableCallerSaveRegister(CodeGenerator * codegen)641 CpuRegister FindAvailableCallerSaveRegister(CodeGenerator* codegen) {
642 size_t ref = static_cast<int>(ref_.AsRegister<CpuRegister>().AsRegister());
643 size_t obj = static_cast<int>(obj_.AsRegister<CpuRegister>().AsRegister());
644 for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
645 if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) {
646 return static_cast<CpuRegister>(i);
647 }
648 }
649 // We shall never fail to find a free caller-save register, as
650 // there are more than two core caller-save registers on x86-64
651 // (meaning it is possible to find one which is different from
652 // `ref` and `obj`).
653 DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u);
654 LOG(FATAL) << "Could not find a free caller-save register";
655 UNREACHABLE();
656 }
657
658 const Location out_;
659 const Location ref_;
660 const Location obj_;
661 const uint32_t offset_;
662 // An additional location containing an index to an array.
663 // Only used for HArrayGet and the UnsafeGetObject &
664 // UnsafeGetObjectVolatile intrinsics.
665 const Location index_;
666
667 DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathX86_64);
668 };
669
670 // Slow path generating a read barrier for a GC root.
671 class ReadBarrierForRootSlowPathX86_64 : public SlowPathCode {
672 public:
ReadBarrierForRootSlowPathX86_64(HInstruction * instruction,Location out,Location root)673 ReadBarrierForRootSlowPathX86_64(HInstruction* instruction, Location out, Location root)
674 : SlowPathCode(instruction), out_(out), root_(root) {
675 DCHECK(kEmitCompilerReadBarrier);
676 }
677
EmitNativeCode(CodeGenerator * codegen)678 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
679 LocationSummary* locations = instruction_->GetLocations();
680 DCHECK(locations->CanCall());
681 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg()));
682 DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
683 << "Unexpected instruction in read barrier for GC root slow path: "
684 << instruction_->DebugName();
685
686 __ Bind(GetEntryLabel());
687 SaveLiveRegisters(codegen, locations);
688
689 InvokeRuntimeCallingConvention calling_convention;
690 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
691 x86_64_codegen->Move(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), root_);
692 x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierForRootSlow),
693 instruction_,
694 instruction_->GetDexPc(),
695 this);
696 CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>();
697 x86_64_codegen->Move(out_, Location::RegisterLocation(RAX));
698
699 RestoreLiveRegisters(codegen, locations);
700 __ jmp(GetExitLabel());
701 }
702
GetDescription() const703 const char* GetDescription() const OVERRIDE { return "ReadBarrierForRootSlowPathX86_64"; }
704
705 private:
706 const Location out_;
707 const Location root_;
708
709 DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathX86_64);
710 };
711
712 #undef __
713 #define __ down_cast<X86_64Assembler*>(GetAssembler())->
714
X86_64IntegerCondition(IfCondition cond)715 inline Condition X86_64IntegerCondition(IfCondition cond) {
716 switch (cond) {
717 case kCondEQ: return kEqual;
718 case kCondNE: return kNotEqual;
719 case kCondLT: return kLess;
720 case kCondLE: return kLessEqual;
721 case kCondGT: return kGreater;
722 case kCondGE: return kGreaterEqual;
723 case kCondB: return kBelow;
724 case kCondBE: return kBelowEqual;
725 case kCondA: return kAbove;
726 case kCondAE: return kAboveEqual;
727 }
728 LOG(FATAL) << "Unreachable";
729 UNREACHABLE();
730 }
731
732 // Maps FP condition to x86_64 name.
X86_64FPCondition(IfCondition cond)733 inline Condition X86_64FPCondition(IfCondition cond) {
734 switch (cond) {
735 case kCondEQ: return kEqual;
736 case kCondNE: return kNotEqual;
737 case kCondLT: return kBelow;
738 case kCondLE: return kBelowEqual;
739 case kCondGT: return kAbove;
740 case kCondGE: return kAboveEqual;
741 default: break; // should not happen
742 };
743 LOG(FATAL) << "Unreachable";
744 UNREACHABLE();
745 }
746
GetSupportedInvokeStaticOrDirectDispatch(const HInvokeStaticOrDirect::DispatchInfo & desired_dispatch_info,MethodReference target_method ATTRIBUTE_UNUSED)747 HInvokeStaticOrDirect::DispatchInfo CodeGeneratorX86_64::GetSupportedInvokeStaticOrDirectDispatch(
748 const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
749 MethodReference target_method ATTRIBUTE_UNUSED) {
750 switch (desired_dispatch_info.code_ptr_location) {
751 case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup:
752 case HInvokeStaticOrDirect::CodePtrLocation::kCallDirect:
753 // For direct code, we actually prefer to call via the code pointer from ArtMethod*.
754 return HInvokeStaticOrDirect::DispatchInfo {
755 desired_dispatch_info.method_load_kind,
756 HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod,
757 desired_dispatch_info.method_load_data,
758 0u
759 };
760 default:
761 return desired_dispatch_info;
762 }
763 }
764
GenerateStaticOrDirectCall(HInvokeStaticOrDirect * invoke,Location temp)765 void CodeGeneratorX86_64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke,
766 Location temp) {
767 // All registers are assumed to be correctly set up.
768
769 Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp.
770 switch (invoke->GetMethodLoadKind()) {
771 case HInvokeStaticOrDirect::MethodLoadKind::kStringInit:
772 // temp = thread->string_init_entrypoint
773 __ gs()->movq(temp.AsRegister<CpuRegister>(),
774 Address::Absolute(invoke->GetStringInitOffset(), /* no_rip */ true));
775 break;
776 case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
777 callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
778 break;
779 case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
780 __ movq(temp.AsRegister<CpuRegister>(), Immediate(invoke->GetMethodAddress()));
781 break;
782 case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup:
783 __ movl(temp.AsRegister<CpuRegister>(), Immediate(0)); // Placeholder.
784 method_patches_.emplace_back(invoke->GetTargetMethod());
785 __ Bind(&method_patches_.back().label); // Bind the label at the end of the "movl" insn.
786 break;
787 case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: {
788 __ movq(temp.AsRegister<CpuRegister>(),
789 Address::Absolute(kDummy32BitOffset, /* no_rip */ false));
790 // Bind a new fixup label at the end of the "movl" insn.
791 uint32_t offset = invoke->GetDexCacheArrayOffset();
792 __ Bind(NewPcRelativeDexCacheArrayPatch(*invoke->GetTargetMethod().dex_file, offset));
793 break;
794 }
795 case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: {
796 Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
797 Register method_reg;
798 CpuRegister reg = temp.AsRegister<CpuRegister>();
799 if (current_method.IsRegister()) {
800 method_reg = current_method.AsRegister<Register>();
801 } else {
802 DCHECK(invoke->GetLocations()->Intrinsified());
803 DCHECK(!current_method.IsValid());
804 method_reg = reg.AsRegister();
805 __ movq(reg, Address(CpuRegister(RSP), kCurrentMethodStackOffset));
806 }
807 // /* ArtMethod*[] */ temp = temp.ptr_sized_fields_->dex_cache_resolved_methods_;
808 __ movq(reg,
809 Address(CpuRegister(method_reg),
810 ArtMethod::DexCacheResolvedMethodsOffset(kX86_64PointerSize).SizeValue()));
811 // temp = temp[index_in_cache];
812 // Note: Don't use invoke->GetTargetMethod() as it may point to a different dex file.
813 uint32_t index_in_cache = invoke->GetDexMethodIndex();
814 __ movq(reg, Address(reg, CodeGenerator::GetCachePointerOffset(index_in_cache)));
815 break;
816 }
817 }
818
819 switch (invoke->GetCodePtrLocation()) {
820 case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf:
821 __ call(&frame_entry_label_);
822 break;
823 case HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative: {
824 relative_call_patches_.emplace_back(invoke->GetTargetMethod());
825 Label* label = &relative_call_patches_.back().label;
826 __ call(label); // Bind to the patch label, override at link time.
827 __ Bind(label); // Bind the label at the end of the "call" insn.
828 break;
829 }
830 case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup:
831 case HInvokeStaticOrDirect::CodePtrLocation::kCallDirect:
832 // Filtered out by GetSupportedInvokeStaticOrDirectDispatch().
833 LOG(FATAL) << "Unsupported";
834 UNREACHABLE();
835 case HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod:
836 // (callee_method + offset_of_quick_compiled_code)()
837 __ call(Address(callee_method.AsRegister<CpuRegister>(),
838 ArtMethod::EntryPointFromQuickCompiledCodeOffset(
839 kX86_64WordSize).SizeValue()));
840 break;
841 }
842
843 DCHECK(!IsLeafMethod());
844 }
845
GenerateVirtualCall(HInvokeVirtual * invoke,Location temp_in)846 void CodeGeneratorX86_64::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp_in) {
847 CpuRegister temp = temp_in.AsRegister<CpuRegister>();
848 size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
849 invoke->GetVTableIndex(), kX86_64PointerSize).SizeValue();
850
851 // Use the calling convention instead of the location of the receiver, as
852 // intrinsics may have put the receiver in a different register. In the intrinsics
853 // slow path, the arguments have been moved to the right place, so here we are
854 // guaranteed that the receiver is the first register of the calling convention.
855 InvokeDexCallingConvention calling_convention;
856 Register receiver = calling_convention.GetRegisterAt(0);
857
858 size_t class_offset = mirror::Object::ClassOffset().SizeValue();
859 // /* HeapReference<Class> */ temp = receiver->klass_
860 __ movl(temp, Address(CpuRegister(receiver), class_offset));
861 MaybeRecordImplicitNullCheck(invoke);
862 // Instead of simply (possibly) unpoisoning `temp` here, we should
863 // emit a read barrier for the previous class reference load.
864 // However this is not required in practice, as this is an
865 // intermediate/temporary reference and because the current
866 // concurrent copying collector keeps the from-space memory
867 // intact/accessible until the end of the marking phase (the
868 // concurrent copying collector may not in the future).
869 __ MaybeUnpoisonHeapReference(temp);
870 // temp = temp->GetMethodAt(method_offset);
871 __ movq(temp, Address(temp, method_offset));
872 // call temp->GetEntryPoint();
873 __ call(Address(temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(
874 kX86_64WordSize).SizeValue()));
875 }
876
RecordSimplePatch()877 void CodeGeneratorX86_64::RecordSimplePatch() {
878 if (GetCompilerOptions().GetIncludePatchInformation()) {
879 simple_patches_.emplace_back();
880 __ Bind(&simple_patches_.back());
881 }
882 }
883
RecordStringPatch(HLoadString * load_string)884 void CodeGeneratorX86_64::RecordStringPatch(HLoadString* load_string) {
885 string_patches_.emplace_back(load_string->GetDexFile(), load_string->GetStringIndex());
886 __ Bind(&string_patches_.back().label);
887 }
888
NewPcRelativeDexCacheArrayPatch(const DexFile & dex_file,uint32_t element_offset)889 Label* CodeGeneratorX86_64::NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file,
890 uint32_t element_offset) {
891 // Add a patch entry and return the label.
892 pc_relative_dex_cache_patches_.emplace_back(dex_file, element_offset);
893 return &pc_relative_dex_cache_patches_.back().label;
894 }
895
EmitLinkerPatches(ArenaVector<LinkerPatch> * linker_patches)896 void CodeGeneratorX86_64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) {
897 DCHECK(linker_patches->empty());
898 size_t size =
899 method_patches_.size() +
900 relative_call_patches_.size() +
901 pc_relative_dex_cache_patches_.size() +
902 simple_patches_.size() +
903 string_patches_.size();
904 linker_patches->reserve(size);
905 // The label points to the end of the "movl" insn but the literal offset for method
906 // patch needs to point to the embedded constant which occupies the last 4 bytes.
907 constexpr uint32_t kLabelPositionToLiteralOffsetAdjustment = 4u;
908 for (const MethodPatchInfo<Label>& info : method_patches_) {
909 uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
910 linker_patches->push_back(LinkerPatch::MethodPatch(literal_offset,
911 info.target_method.dex_file,
912 info.target_method.dex_method_index));
913 }
914 for (const MethodPatchInfo<Label>& info : relative_call_patches_) {
915 uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
916 linker_patches->push_back(LinkerPatch::RelativeCodePatch(literal_offset,
917 info.target_method.dex_file,
918 info.target_method.dex_method_index));
919 }
920 for (const PcRelativeDexCacheAccessInfo& info : pc_relative_dex_cache_patches_) {
921 uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
922 linker_patches->push_back(LinkerPatch::DexCacheArrayPatch(literal_offset,
923 &info.target_dex_file,
924 info.label.Position(),
925 info.element_offset));
926 }
927 for (const Label& label : simple_patches_) {
928 uint32_t literal_offset = label.Position() - kLabelPositionToLiteralOffsetAdjustment;
929 linker_patches->push_back(LinkerPatch::RecordPosition(literal_offset));
930 }
931 for (const StringPatchInfo<Label>& info : string_patches_) {
932 // These are always PC-relative, see GetSupportedLoadStringKind().
933 uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
934 linker_patches->push_back(LinkerPatch::RelativeStringPatch(literal_offset,
935 &info.dex_file,
936 info.label.Position(),
937 info.string_index));
938 }
939 }
940
DumpCoreRegister(std::ostream & stream,int reg) const941 void CodeGeneratorX86_64::DumpCoreRegister(std::ostream& stream, int reg) const {
942 stream << Register(reg);
943 }
944
DumpFloatingPointRegister(std::ostream & stream,int reg) const945 void CodeGeneratorX86_64::DumpFloatingPointRegister(std::ostream& stream, int reg) const {
946 stream << FloatRegister(reg);
947 }
948
SaveCoreRegister(size_t stack_index,uint32_t reg_id)949 size_t CodeGeneratorX86_64::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
950 __ movq(Address(CpuRegister(RSP), stack_index), CpuRegister(reg_id));
951 return kX86_64WordSize;
952 }
953
RestoreCoreRegister(size_t stack_index,uint32_t reg_id)954 size_t CodeGeneratorX86_64::RestoreCoreRegister(size_t stack_index, uint32_t reg_id) {
955 __ movq(CpuRegister(reg_id), Address(CpuRegister(RSP), stack_index));
956 return kX86_64WordSize;
957 }
958
SaveFloatingPointRegister(size_t stack_index,uint32_t reg_id)959 size_t CodeGeneratorX86_64::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
960 __ movsd(Address(CpuRegister(RSP), stack_index), XmmRegister(reg_id));
961 return kX86_64WordSize;
962 }
963
RestoreFloatingPointRegister(size_t stack_index,uint32_t reg_id)964 size_t CodeGeneratorX86_64::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
965 __ movsd(XmmRegister(reg_id), Address(CpuRegister(RSP), stack_index));
966 return kX86_64WordSize;
967 }
968
InvokeRuntime(QuickEntrypointEnum entrypoint,HInstruction * instruction,uint32_t dex_pc,SlowPathCode * slow_path)969 void CodeGeneratorX86_64::InvokeRuntime(QuickEntrypointEnum entrypoint,
970 HInstruction* instruction,
971 uint32_t dex_pc,
972 SlowPathCode* slow_path) {
973 InvokeRuntime(GetThreadOffset<kX86_64WordSize>(entrypoint).Int32Value(),
974 instruction,
975 dex_pc,
976 slow_path);
977 }
978
InvokeRuntime(int32_t entry_point_offset,HInstruction * instruction,uint32_t dex_pc,SlowPathCode * slow_path)979 void CodeGeneratorX86_64::InvokeRuntime(int32_t entry_point_offset,
980 HInstruction* instruction,
981 uint32_t dex_pc,
982 SlowPathCode* slow_path) {
983 ValidateInvokeRuntime(instruction, slow_path);
984 __ gs()->call(Address::Absolute(entry_point_offset, /* no_rip */ true));
985 RecordPcInfo(instruction, dex_pc, slow_path);
986 }
987
988 static constexpr int kNumberOfCpuRegisterPairs = 0;
989 // Use a fake return address register to mimic Quick.
990 static constexpr Register kFakeReturnRegister = Register(kLastCpuRegister + 1);
CodeGeneratorX86_64(HGraph * graph,const X86_64InstructionSetFeatures & isa_features,const CompilerOptions & compiler_options,OptimizingCompilerStats * stats)991 CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph,
992 const X86_64InstructionSetFeatures& isa_features,
993 const CompilerOptions& compiler_options,
994 OptimizingCompilerStats* stats)
995 : CodeGenerator(graph,
996 kNumberOfCpuRegisters,
997 kNumberOfFloatRegisters,
998 kNumberOfCpuRegisterPairs,
999 ComputeRegisterMask(reinterpret_cast<const int*>(kCoreCalleeSaves),
1000 arraysize(kCoreCalleeSaves))
1001 | (1 << kFakeReturnRegister),
1002 ComputeRegisterMask(reinterpret_cast<const int*>(kFpuCalleeSaves),
1003 arraysize(kFpuCalleeSaves)),
1004 compiler_options,
1005 stats),
1006 block_labels_(nullptr),
1007 location_builder_(graph, this),
1008 instruction_visitor_(graph, this),
1009 move_resolver_(graph->GetArena(), this),
1010 assembler_(graph->GetArena()),
1011 isa_features_(isa_features),
1012 constant_area_start_(0),
1013 method_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
1014 relative_call_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
1015 pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
1016 simple_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
1017 string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
1018 fixups_to_jump_tables_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
1019 AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister));
1020 }
1021
InstructionCodeGeneratorX86_64(HGraph * graph,CodeGeneratorX86_64 * codegen)1022 InstructionCodeGeneratorX86_64::InstructionCodeGeneratorX86_64(HGraph* graph,
1023 CodeGeneratorX86_64* codegen)
1024 : InstructionCodeGenerator(graph, codegen),
1025 assembler_(codegen->GetAssembler()),
1026 codegen_(codegen) {}
1027
SetupBlockedRegisters() const1028 void CodeGeneratorX86_64::SetupBlockedRegisters() const {
1029 // Stack register is always reserved.
1030 blocked_core_registers_[RSP] = true;
1031
1032 // Block the register used as TMP.
1033 blocked_core_registers_[TMP] = true;
1034 }
1035
DWARFReg(Register reg)1036 static dwarf::Reg DWARFReg(Register reg) {
1037 return dwarf::Reg::X86_64Core(static_cast<int>(reg));
1038 }
1039
DWARFReg(FloatRegister reg)1040 static dwarf::Reg DWARFReg(FloatRegister reg) {
1041 return dwarf::Reg::X86_64Fp(static_cast<int>(reg));
1042 }
1043
GenerateFrameEntry()1044 void CodeGeneratorX86_64::GenerateFrameEntry() {
1045 __ cfi().SetCurrentCFAOffset(kX86_64WordSize); // return address
1046 __ Bind(&frame_entry_label_);
1047 bool skip_overflow_check = IsLeafMethod()
1048 && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86_64);
1049 DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
1050
1051 if (!skip_overflow_check) {
1052 __ testq(CpuRegister(RAX), Address(
1053 CpuRegister(RSP), -static_cast<int32_t>(GetStackOverflowReservedBytes(kX86_64))));
1054 RecordPcInfo(nullptr, 0);
1055 }
1056
1057 if (HasEmptyFrame()) {
1058 return;
1059 }
1060
1061 for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) {
1062 Register reg = kCoreCalleeSaves[i];
1063 if (allocated_registers_.ContainsCoreRegister(reg)) {
1064 __ pushq(CpuRegister(reg));
1065 __ cfi().AdjustCFAOffset(kX86_64WordSize);
1066 __ cfi().RelOffset(DWARFReg(reg), 0);
1067 }
1068 }
1069
1070 int adjust = GetFrameSize() - GetCoreSpillSize();
1071 __ subq(CpuRegister(RSP), Immediate(adjust));
1072 __ cfi().AdjustCFAOffset(adjust);
1073 uint32_t xmm_spill_location = GetFpuSpillStart();
1074 size_t xmm_spill_slot_size = GetFloatingPointSpillSlotSize();
1075
1076 for (int i = arraysize(kFpuCalleeSaves) - 1; i >= 0; --i) {
1077 if (allocated_registers_.ContainsFloatingPointRegister(kFpuCalleeSaves[i])) {
1078 int offset = xmm_spill_location + (xmm_spill_slot_size * i);
1079 __ movsd(Address(CpuRegister(RSP), offset), XmmRegister(kFpuCalleeSaves[i]));
1080 __ cfi().RelOffset(DWARFReg(kFpuCalleeSaves[i]), offset);
1081 }
1082 }
1083
1084 __ movq(Address(CpuRegister(RSP), kCurrentMethodStackOffset),
1085 CpuRegister(kMethodRegisterArgument));
1086 }
1087
GenerateFrameExit()1088 void CodeGeneratorX86_64::GenerateFrameExit() {
1089 __ cfi().RememberState();
1090 if (!HasEmptyFrame()) {
1091 uint32_t xmm_spill_location = GetFpuSpillStart();
1092 size_t xmm_spill_slot_size = GetFloatingPointSpillSlotSize();
1093 for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) {
1094 if (allocated_registers_.ContainsFloatingPointRegister(kFpuCalleeSaves[i])) {
1095 int offset = xmm_spill_location + (xmm_spill_slot_size * i);
1096 __ movsd(XmmRegister(kFpuCalleeSaves[i]), Address(CpuRegister(RSP), offset));
1097 __ cfi().Restore(DWARFReg(kFpuCalleeSaves[i]));
1098 }
1099 }
1100
1101 int adjust = GetFrameSize() - GetCoreSpillSize();
1102 __ addq(CpuRegister(RSP), Immediate(adjust));
1103 __ cfi().AdjustCFAOffset(-adjust);
1104
1105 for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
1106 Register reg = kCoreCalleeSaves[i];
1107 if (allocated_registers_.ContainsCoreRegister(reg)) {
1108 __ popq(CpuRegister(reg));
1109 __ cfi().AdjustCFAOffset(-static_cast<int>(kX86_64WordSize));
1110 __ cfi().Restore(DWARFReg(reg));
1111 }
1112 }
1113 }
1114 __ ret();
1115 __ cfi().RestoreState();
1116 __ cfi().DefCFAOffset(GetFrameSize());
1117 }
1118
Bind(HBasicBlock * block)1119 void CodeGeneratorX86_64::Bind(HBasicBlock* block) {
1120 __ Bind(GetLabelOf(block));
1121 }
1122
Move(Location destination,Location source)1123 void CodeGeneratorX86_64::Move(Location destination, Location source) {
1124 if (source.Equals(destination)) {
1125 return;
1126 }
1127 if (destination.IsRegister()) {
1128 CpuRegister dest = destination.AsRegister<CpuRegister>();
1129 if (source.IsRegister()) {
1130 __ movq(dest, source.AsRegister<CpuRegister>());
1131 } else if (source.IsFpuRegister()) {
1132 __ movd(dest, source.AsFpuRegister<XmmRegister>());
1133 } else if (source.IsStackSlot()) {
1134 __ movl(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
1135 } else if (source.IsConstant()) {
1136 HConstant* constant = source.GetConstant();
1137 if (constant->IsLongConstant()) {
1138 Load64BitValue(dest, constant->AsLongConstant()->GetValue());
1139 } else {
1140 Load32BitValue(dest, GetInt32ValueOf(constant));
1141 }
1142 } else {
1143 DCHECK(source.IsDoubleStackSlot());
1144 __ movq(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
1145 }
1146 } else if (destination.IsFpuRegister()) {
1147 XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
1148 if (source.IsRegister()) {
1149 __ movd(dest, source.AsRegister<CpuRegister>());
1150 } else if (source.IsFpuRegister()) {
1151 __ movaps(dest, source.AsFpuRegister<XmmRegister>());
1152 } else if (source.IsConstant()) {
1153 HConstant* constant = source.GetConstant();
1154 int64_t value = CodeGenerator::GetInt64ValueOf(constant);
1155 if (constant->IsFloatConstant()) {
1156 Load32BitValue(dest, static_cast<int32_t>(value));
1157 } else {
1158 Load64BitValue(dest, value);
1159 }
1160 } else if (source.IsStackSlot()) {
1161 __ movss(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
1162 } else {
1163 DCHECK(source.IsDoubleStackSlot());
1164 __ movsd(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
1165 }
1166 } else if (destination.IsStackSlot()) {
1167 if (source.IsRegister()) {
1168 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()),
1169 source.AsRegister<CpuRegister>());
1170 } else if (source.IsFpuRegister()) {
1171 __ movss(Address(CpuRegister(RSP), destination.GetStackIndex()),
1172 source.AsFpuRegister<XmmRegister>());
1173 } else if (source.IsConstant()) {
1174 HConstant* constant = source.GetConstant();
1175 int32_t value = GetInt32ValueOf(constant);
1176 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), Immediate(value));
1177 } else {
1178 DCHECK(source.IsStackSlot()) << source;
1179 __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
1180 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
1181 }
1182 } else {
1183 DCHECK(destination.IsDoubleStackSlot());
1184 if (source.IsRegister()) {
1185 __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()),
1186 source.AsRegister<CpuRegister>());
1187 } else if (source.IsFpuRegister()) {
1188 __ movsd(Address(CpuRegister(RSP), destination.GetStackIndex()),
1189 source.AsFpuRegister<XmmRegister>());
1190 } else if (source.IsConstant()) {
1191 HConstant* constant = source.GetConstant();
1192 int64_t value;
1193 if (constant->IsDoubleConstant()) {
1194 value = bit_cast<int64_t, double>(constant->AsDoubleConstant()->GetValue());
1195 } else {
1196 DCHECK(constant->IsLongConstant());
1197 value = constant->AsLongConstant()->GetValue();
1198 }
1199 Store64BitValueToStack(destination, value);
1200 } else {
1201 DCHECK(source.IsDoubleStackSlot());
1202 __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
1203 __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
1204 }
1205 }
1206 }
1207
MoveConstant(Location location,int32_t value)1208 void CodeGeneratorX86_64::MoveConstant(Location location, int32_t value) {
1209 DCHECK(location.IsRegister());
1210 Load64BitValue(location.AsRegister<CpuRegister>(), static_cast<int64_t>(value));
1211 }
1212
MoveLocation(Location dst,Location src,Primitive::Type dst_type ATTRIBUTE_UNUSED)1213 void CodeGeneratorX86_64::MoveLocation(
1214 Location dst, Location src, Primitive::Type dst_type ATTRIBUTE_UNUSED) {
1215 Move(dst, src);
1216 }
1217
AddLocationAsTemp(Location location,LocationSummary * locations)1218 void CodeGeneratorX86_64::AddLocationAsTemp(Location location, LocationSummary* locations) {
1219 if (location.IsRegister()) {
1220 locations->AddTemp(location);
1221 } else {
1222 UNIMPLEMENTED(FATAL) << "AddLocationAsTemp not implemented for location " << location;
1223 }
1224 }
1225
HandleGoto(HInstruction * got,HBasicBlock * successor)1226 void InstructionCodeGeneratorX86_64::HandleGoto(HInstruction* got, HBasicBlock* successor) {
1227 DCHECK(!successor->IsExitBlock());
1228
1229 HBasicBlock* block = got->GetBlock();
1230 HInstruction* previous = got->GetPrevious();
1231
1232 HLoopInformation* info = block->GetLoopInformation();
1233 if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
1234 GenerateSuspendCheck(info->GetSuspendCheck(), successor);
1235 return;
1236 }
1237
1238 if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) {
1239 GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr);
1240 }
1241 if (!codegen_->GoesToNextBlock(got->GetBlock(), successor)) {
1242 __ jmp(codegen_->GetLabelOf(successor));
1243 }
1244 }
1245
VisitGoto(HGoto * got)1246 void LocationsBuilderX86_64::VisitGoto(HGoto* got) {
1247 got->SetLocations(nullptr);
1248 }
1249
VisitGoto(HGoto * got)1250 void InstructionCodeGeneratorX86_64::VisitGoto(HGoto* got) {
1251 HandleGoto(got, got->GetSuccessor());
1252 }
1253
VisitTryBoundary(HTryBoundary * try_boundary)1254 void LocationsBuilderX86_64::VisitTryBoundary(HTryBoundary* try_boundary) {
1255 try_boundary->SetLocations(nullptr);
1256 }
1257
VisitTryBoundary(HTryBoundary * try_boundary)1258 void InstructionCodeGeneratorX86_64::VisitTryBoundary(HTryBoundary* try_boundary) {
1259 HBasicBlock* successor = try_boundary->GetNormalFlowSuccessor();
1260 if (!successor->IsExitBlock()) {
1261 HandleGoto(try_boundary, successor);
1262 }
1263 }
1264
VisitExit(HExit * exit)1265 void LocationsBuilderX86_64::VisitExit(HExit* exit) {
1266 exit->SetLocations(nullptr);
1267 }
1268
VisitExit(HExit * exit ATTRIBUTE_UNUSED)1269 void InstructionCodeGeneratorX86_64::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
1270 }
1271
1272 template<class LabelType>
GenerateFPJumps(HCondition * cond,LabelType * true_label,LabelType * false_label)1273 void InstructionCodeGeneratorX86_64::GenerateFPJumps(HCondition* cond,
1274 LabelType* true_label,
1275 LabelType* false_label) {
1276 if (cond->IsFPConditionTrueIfNaN()) {
1277 __ j(kUnordered, true_label);
1278 } else if (cond->IsFPConditionFalseIfNaN()) {
1279 __ j(kUnordered, false_label);
1280 }
1281 __ j(X86_64FPCondition(cond->GetCondition()), true_label);
1282 }
1283
GenerateCompareTest(HCondition * condition)1284 void InstructionCodeGeneratorX86_64::GenerateCompareTest(HCondition* condition) {
1285 LocationSummary* locations = condition->GetLocations();
1286
1287 Location left = locations->InAt(0);
1288 Location right = locations->InAt(1);
1289 Primitive::Type type = condition->InputAt(0)->GetType();
1290 switch (type) {
1291 case Primitive::kPrimBoolean:
1292 case Primitive::kPrimByte:
1293 case Primitive::kPrimChar:
1294 case Primitive::kPrimShort:
1295 case Primitive::kPrimInt:
1296 case Primitive::kPrimNot: {
1297 CpuRegister left_reg = left.AsRegister<CpuRegister>();
1298 if (right.IsConstant()) {
1299 int32_t value = CodeGenerator::GetInt32ValueOf(right.GetConstant());
1300 if (value == 0) {
1301 __ testl(left_reg, left_reg);
1302 } else {
1303 __ cmpl(left_reg, Immediate(value));
1304 }
1305 } else if (right.IsStackSlot()) {
1306 __ cmpl(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
1307 } else {
1308 __ cmpl(left_reg, right.AsRegister<CpuRegister>());
1309 }
1310 break;
1311 }
1312 case Primitive::kPrimLong: {
1313 CpuRegister left_reg = left.AsRegister<CpuRegister>();
1314 if (right.IsConstant()) {
1315 int64_t value = right.GetConstant()->AsLongConstant()->GetValue();
1316 codegen_->Compare64BitValue(left_reg, value);
1317 } else if (right.IsDoubleStackSlot()) {
1318 __ cmpq(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
1319 } else {
1320 __ cmpq(left_reg, right.AsRegister<CpuRegister>());
1321 }
1322 break;
1323 }
1324 case Primitive::kPrimFloat: {
1325 if (right.IsFpuRegister()) {
1326 __ ucomiss(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>());
1327 } else if (right.IsConstant()) {
1328 __ ucomiss(left.AsFpuRegister<XmmRegister>(),
1329 codegen_->LiteralFloatAddress(
1330 right.GetConstant()->AsFloatConstant()->GetValue()));
1331 } else {
1332 DCHECK(right.IsStackSlot());
1333 __ ucomiss(left.AsFpuRegister<XmmRegister>(),
1334 Address(CpuRegister(RSP), right.GetStackIndex()));
1335 }
1336 break;
1337 }
1338 case Primitive::kPrimDouble: {
1339 if (right.IsFpuRegister()) {
1340 __ ucomisd(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>());
1341 } else if (right.IsConstant()) {
1342 __ ucomisd(left.AsFpuRegister<XmmRegister>(),
1343 codegen_->LiteralDoubleAddress(
1344 right.GetConstant()->AsDoubleConstant()->GetValue()));
1345 } else {
1346 DCHECK(right.IsDoubleStackSlot());
1347 __ ucomisd(left.AsFpuRegister<XmmRegister>(),
1348 Address(CpuRegister(RSP), right.GetStackIndex()));
1349 }
1350 break;
1351 }
1352 default:
1353 LOG(FATAL) << "Unexpected condition type " << type;
1354 }
1355 }
1356
1357 template<class LabelType>
GenerateCompareTestAndBranch(HCondition * condition,LabelType * true_target_in,LabelType * false_target_in)1358 void InstructionCodeGeneratorX86_64::GenerateCompareTestAndBranch(HCondition* condition,
1359 LabelType* true_target_in,
1360 LabelType* false_target_in) {
1361 // Generated branching requires both targets to be explicit. If either of the
1362 // targets is nullptr (fallthrough) use and bind `fallthrough_target` instead.
1363 LabelType fallthrough_target;
1364 LabelType* true_target = true_target_in == nullptr ? &fallthrough_target : true_target_in;
1365 LabelType* false_target = false_target_in == nullptr ? &fallthrough_target : false_target_in;
1366
1367 // Generate the comparison to set the CC.
1368 GenerateCompareTest(condition);
1369
1370 // Now generate the correct jump(s).
1371 Primitive::Type type = condition->InputAt(0)->GetType();
1372 switch (type) {
1373 case Primitive::kPrimLong: {
1374 __ j(X86_64IntegerCondition(condition->GetCondition()), true_target);
1375 break;
1376 }
1377 case Primitive::kPrimFloat: {
1378 GenerateFPJumps(condition, true_target, false_target);
1379 break;
1380 }
1381 case Primitive::kPrimDouble: {
1382 GenerateFPJumps(condition, true_target, false_target);
1383 break;
1384 }
1385 default:
1386 LOG(FATAL) << "Unexpected condition type " << type;
1387 }
1388
1389 if (false_target != &fallthrough_target) {
1390 __ jmp(false_target);
1391 }
1392
1393 if (fallthrough_target.IsLinked()) {
1394 __ Bind(&fallthrough_target);
1395 }
1396 }
1397
AreEflagsSetFrom(HInstruction * cond,HInstruction * branch)1398 static bool AreEflagsSetFrom(HInstruction* cond, HInstruction* branch) {
1399 // Moves may affect the eflags register (move zero uses xorl), so the EFLAGS
1400 // are set only strictly before `branch`. We can't use the eflags on long
1401 // conditions if they are materialized due to the complex branching.
1402 return cond->IsCondition() &&
1403 cond->GetNext() == branch &&
1404 !Primitive::IsFloatingPointType(cond->InputAt(0)->GetType());
1405 }
1406
1407 template<class LabelType>
GenerateTestAndBranch(HInstruction * instruction,size_t condition_input_index,LabelType * true_target,LabelType * false_target)1408 void InstructionCodeGeneratorX86_64::GenerateTestAndBranch(HInstruction* instruction,
1409 size_t condition_input_index,
1410 LabelType* true_target,
1411 LabelType* false_target) {
1412 HInstruction* cond = instruction->InputAt(condition_input_index);
1413
1414 if (true_target == nullptr && false_target == nullptr) {
1415 // Nothing to do. The code always falls through.
1416 return;
1417 } else if (cond->IsIntConstant()) {
1418 // Constant condition, statically compared against "true" (integer value 1).
1419 if (cond->AsIntConstant()->IsTrue()) {
1420 if (true_target != nullptr) {
1421 __ jmp(true_target);
1422 }
1423 } else {
1424 DCHECK(cond->AsIntConstant()->IsFalse()) << cond->AsIntConstant()->GetValue();
1425 if (false_target != nullptr) {
1426 __ jmp(false_target);
1427 }
1428 }
1429 return;
1430 }
1431
1432 // The following code generates these patterns:
1433 // (1) true_target == nullptr && false_target != nullptr
1434 // - opposite condition true => branch to false_target
1435 // (2) true_target != nullptr && false_target == nullptr
1436 // - condition true => branch to true_target
1437 // (3) true_target != nullptr && false_target != nullptr
1438 // - condition true => branch to true_target
1439 // - branch to false_target
1440 if (IsBooleanValueOrMaterializedCondition(cond)) {
1441 if (AreEflagsSetFrom(cond, instruction)) {
1442 if (true_target == nullptr) {
1443 __ j(X86_64IntegerCondition(cond->AsCondition()->GetOppositeCondition()), false_target);
1444 } else {
1445 __ j(X86_64IntegerCondition(cond->AsCondition()->GetCondition()), true_target);
1446 }
1447 } else {
1448 // Materialized condition, compare against 0.
1449 Location lhs = instruction->GetLocations()->InAt(condition_input_index);
1450 if (lhs.IsRegister()) {
1451 __ testl(lhs.AsRegister<CpuRegister>(), lhs.AsRegister<CpuRegister>());
1452 } else {
1453 __ cmpl(Address(CpuRegister(RSP), lhs.GetStackIndex()), Immediate(0));
1454 }
1455 if (true_target == nullptr) {
1456 __ j(kEqual, false_target);
1457 } else {
1458 __ j(kNotEqual, true_target);
1459 }
1460 }
1461 } else {
1462 // Condition has not been materialized, use its inputs as the
1463 // comparison and its condition as the branch condition.
1464 HCondition* condition = cond->AsCondition();
1465
1466 // If this is a long or FP comparison that has been folded into
1467 // the HCondition, generate the comparison directly.
1468 Primitive::Type type = condition->InputAt(0)->GetType();
1469 if (type == Primitive::kPrimLong || Primitive::IsFloatingPointType(type)) {
1470 GenerateCompareTestAndBranch(condition, true_target, false_target);
1471 return;
1472 }
1473
1474 Location lhs = condition->GetLocations()->InAt(0);
1475 Location rhs = condition->GetLocations()->InAt(1);
1476 if (rhs.IsRegister()) {
1477 __ cmpl(lhs.AsRegister<CpuRegister>(), rhs.AsRegister<CpuRegister>());
1478 } else if (rhs.IsConstant()) {
1479 int32_t constant = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
1480 codegen_->Compare32BitValue(lhs.AsRegister<CpuRegister>(), constant);
1481 } else {
1482 __ cmpl(lhs.AsRegister<CpuRegister>(),
1483 Address(CpuRegister(RSP), rhs.GetStackIndex()));
1484 }
1485 if (true_target == nullptr) {
1486 __ j(X86_64IntegerCondition(condition->GetOppositeCondition()), false_target);
1487 } else {
1488 __ j(X86_64IntegerCondition(condition->GetCondition()), true_target);
1489 }
1490 }
1491
1492 // If neither branch falls through (case 3), the conditional branch to `true_target`
1493 // was already emitted (case 2) and we need to emit a jump to `false_target`.
1494 if (true_target != nullptr && false_target != nullptr) {
1495 __ jmp(false_target);
1496 }
1497 }
1498
VisitIf(HIf * if_instr)1499 void LocationsBuilderX86_64::VisitIf(HIf* if_instr) {
1500 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(if_instr);
1501 if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
1502 locations->SetInAt(0, Location::Any());
1503 }
1504 }
1505
VisitIf(HIf * if_instr)1506 void InstructionCodeGeneratorX86_64::VisitIf(HIf* if_instr) {
1507 HBasicBlock* true_successor = if_instr->IfTrueSuccessor();
1508 HBasicBlock* false_successor = if_instr->IfFalseSuccessor();
1509 Label* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ?
1510 nullptr : codegen_->GetLabelOf(true_successor);
1511 Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ?
1512 nullptr : codegen_->GetLabelOf(false_successor);
1513 GenerateTestAndBranch(if_instr, /* condition_input_index */ 0, true_target, false_target);
1514 }
1515
VisitDeoptimize(HDeoptimize * deoptimize)1516 void LocationsBuilderX86_64::VisitDeoptimize(HDeoptimize* deoptimize) {
1517 LocationSummary* locations = new (GetGraph()->GetArena())
1518 LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
1519 if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
1520 locations->SetInAt(0, Location::Any());
1521 }
1522 }
1523
VisitDeoptimize(HDeoptimize * deoptimize)1524 void InstructionCodeGeneratorX86_64::VisitDeoptimize(HDeoptimize* deoptimize) {
1525 SlowPathCode* slow_path = deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathX86_64>(deoptimize);
1526 GenerateTestAndBranch<Label>(deoptimize,
1527 /* condition_input_index */ 0,
1528 slow_path->GetEntryLabel(),
1529 /* false_target */ nullptr);
1530 }
1531
SelectCanUseCMOV(HSelect * select)1532 static bool SelectCanUseCMOV(HSelect* select) {
1533 // There are no conditional move instructions for XMMs.
1534 if (Primitive::IsFloatingPointType(select->GetType())) {
1535 return false;
1536 }
1537
1538 // A FP condition doesn't generate the single CC that we need.
1539 HInstruction* condition = select->GetCondition();
1540 if (condition->IsCondition() &&
1541 Primitive::IsFloatingPointType(condition->InputAt(0)->GetType())) {
1542 return false;
1543 }
1544
1545 // We can generate a CMOV for this Select.
1546 return true;
1547 }
1548
VisitSelect(HSelect * select)1549 void LocationsBuilderX86_64::VisitSelect(HSelect* select) {
1550 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(select);
1551 if (Primitive::IsFloatingPointType(select->GetType())) {
1552 locations->SetInAt(0, Location::RequiresFpuRegister());
1553 locations->SetInAt(1, Location::Any());
1554 } else {
1555 locations->SetInAt(0, Location::RequiresRegister());
1556 if (SelectCanUseCMOV(select)) {
1557 if (select->InputAt(1)->IsConstant()) {
1558 locations->SetInAt(1, Location::RequiresRegister());
1559 } else {
1560 locations->SetInAt(1, Location::Any());
1561 }
1562 } else {
1563 locations->SetInAt(1, Location::Any());
1564 }
1565 }
1566 if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) {
1567 locations->SetInAt(2, Location::RequiresRegister());
1568 }
1569 locations->SetOut(Location::SameAsFirstInput());
1570 }
1571
VisitSelect(HSelect * select)1572 void InstructionCodeGeneratorX86_64::VisitSelect(HSelect* select) {
1573 LocationSummary* locations = select->GetLocations();
1574 if (SelectCanUseCMOV(select)) {
1575 // If both the condition and the source types are integer, we can generate
1576 // a CMOV to implement Select.
1577 CpuRegister value_false = locations->InAt(0).AsRegister<CpuRegister>();
1578 Location value_true_loc = locations->InAt(1);
1579 DCHECK(locations->InAt(0).Equals(locations->Out()));
1580
1581 HInstruction* select_condition = select->GetCondition();
1582 Condition cond = kNotEqual;
1583
1584 // Figure out how to test the 'condition'.
1585 if (select_condition->IsCondition()) {
1586 HCondition* condition = select_condition->AsCondition();
1587 if (!condition->IsEmittedAtUseSite()) {
1588 // This was a previously materialized condition.
1589 // Can we use the existing condition code?
1590 if (AreEflagsSetFrom(condition, select)) {
1591 // Materialization was the previous instruction. Condition codes are right.
1592 cond = X86_64IntegerCondition(condition->GetCondition());
1593 } else {
1594 // No, we have to recreate the condition code.
1595 CpuRegister cond_reg = locations->InAt(2).AsRegister<CpuRegister>();
1596 __ testl(cond_reg, cond_reg);
1597 }
1598 } else {
1599 GenerateCompareTest(condition);
1600 cond = X86_64IntegerCondition(condition->GetCondition());
1601 }
1602 } else {
1603 // Must be a boolean condition, which needs to be compared to 0.
1604 CpuRegister cond_reg = locations->InAt(2).AsRegister<CpuRegister>();
1605 __ testl(cond_reg, cond_reg);
1606 }
1607
1608 // If the condition is true, overwrite the output, which already contains false.
1609 // Generate the correct sized CMOV.
1610 bool is_64_bit = Primitive::Is64BitType(select->GetType());
1611 if (value_true_loc.IsRegister()) {
1612 __ cmov(cond, value_false, value_true_loc.AsRegister<CpuRegister>(), is_64_bit);
1613 } else {
1614 __ cmov(cond,
1615 value_false,
1616 Address(CpuRegister(RSP), value_true_loc.GetStackIndex()), is_64_bit);
1617 }
1618 } else {
1619 NearLabel false_target;
1620 GenerateTestAndBranch<NearLabel>(select,
1621 /* condition_input_index */ 2,
1622 /* true_target */ nullptr,
1623 &false_target);
1624 codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType());
1625 __ Bind(&false_target);
1626 }
1627 }
1628
VisitNativeDebugInfo(HNativeDebugInfo * info)1629 void LocationsBuilderX86_64::VisitNativeDebugInfo(HNativeDebugInfo* info) {
1630 new (GetGraph()->GetArena()) LocationSummary(info);
1631 }
1632
VisitNativeDebugInfo(HNativeDebugInfo *)1633 void InstructionCodeGeneratorX86_64::VisitNativeDebugInfo(HNativeDebugInfo*) {
1634 // MaybeRecordNativeDebugInfo is already called implicitly in CodeGenerator::Compile.
1635 }
1636
GenerateNop()1637 void CodeGeneratorX86_64::GenerateNop() {
1638 __ nop();
1639 }
1640
HandleCondition(HCondition * cond)1641 void LocationsBuilderX86_64::HandleCondition(HCondition* cond) {
1642 LocationSummary* locations =
1643 new (GetGraph()->GetArena()) LocationSummary(cond, LocationSummary::kNoCall);
1644 // Handle the long/FP comparisons made in instruction simplification.
1645 switch (cond->InputAt(0)->GetType()) {
1646 case Primitive::kPrimLong:
1647 locations->SetInAt(0, Location::RequiresRegister());
1648 locations->SetInAt(1, Location::Any());
1649 break;
1650 case Primitive::kPrimFloat:
1651 case Primitive::kPrimDouble:
1652 locations->SetInAt(0, Location::RequiresFpuRegister());
1653 locations->SetInAt(1, Location::Any());
1654 break;
1655 default:
1656 locations->SetInAt(0, Location::RequiresRegister());
1657 locations->SetInAt(1, Location::Any());
1658 break;
1659 }
1660 if (!cond->IsEmittedAtUseSite()) {
1661 locations->SetOut(Location::RequiresRegister());
1662 }
1663 }
1664
HandleCondition(HCondition * cond)1665 void InstructionCodeGeneratorX86_64::HandleCondition(HCondition* cond) {
1666 if (cond->IsEmittedAtUseSite()) {
1667 return;
1668 }
1669
1670 LocationSummary* locations = cond->GetLocations();
1671 Location lhs = locations->InAt(0);
1672 Location rhs = locations->InAt(1);
1673 CpuRegister reg = locations->Out().AsRegister<CpuRegister>();
1674 NearLabel true_label, false_label;
1675
1676 switch (cond->InputAt(0)->GetType()) {
1677 default:
1678 // Integer case.
1679
1680 // Clear output register: setcc only sets the low byte.
1681 __ xorl(reg, reg);
1682
1683 if (rhs.IsRegister()) {
1684 __ cmpl(lhs.AsRegister<CpuRegister>(), rhs.AsRegister<CpuRegister>());
1685 } else if (rhs.IsConstant()) {
1686 int32_t constant = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
1687 codegen_->Compare32BitValue(lhs.AsRegister<CpuRegister>(), constant);
1688 } else {
1689 __ cmpl(lhs.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), rhs.GetStackIndex()));
1690 }
1691 __ setcc(X86_64IntegerCondition(cond->GetCondition()), reg);
1692 return;
1693 case Primitive::kPrimLong:
1694 // Clear output register: setcc only sets the low byte.
1695 __ xorl(reg, reg);
1696
1697 if (rhs.IsRegister()) {
1698 __ cmpq(lhs.AsRegister<CpuRegister>(), rhs.AsRegister<CpuRegister>());
1699 } else if (rhs.IsConstant()) {
1700 int64_t value = rhs.GetConstant()->AsLongConstant()->GetValue();
1701 codegen_->Compare64BitValue(lhs.AsRegister<CpuRegister>(), value);
1702 } else {
1703 __ cmpq(lhs.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), rhs.GetStackIndex()));
1704 }
1705 __ setcc(X86_64IntegerCondition(cond->GetCondition()), reg);
1706 return;
1707 case Primitive::kPrimFloat: {
1708 XmmRegister lhs_reg = lhs.AsFpuRegister<XmmRegister>();
1709 if (rhs.IsConstant()) {
1710 float value = rhs.GetConstant()->AsFloatConstant()->GetValue();
1711 __ ucomiss(lhs_reg, codegen_->LiteralFloatAddress(value));
1712 } else if (rhs.IsStackSlot()) {
1713 __ ucomiss(lhs_reg, Address(CpuRegister(RSP), rhs.GetStackIndex()));
1714 } else {
1715 __ ucomiss(lhs_reg, rhs.AsFpuRegister<XmmRegister>());
1716 }
1717 GenerateFPJumps(cond, &true_label, &false_label);
1718 break;
1719 }
1720 case Primitive::kPrimDouble: {
1721 XmmRegister lhs_reg = lhs.AsFpuRegister<XmmRegister>();
1722 if (rhs.IsConstant()) {
1723 double value = rhs.GetConstant()->AsDoubleConstant()->GetValue();
1724 __ ucomisd(lhs_reg, codegen_->LiteralDoubleAddress(value));
1725 } else if (rhs.IsDoubleStackSlot()) {
1726 __ ucomisd(lhs_reg, Address(CpuRegister(RSP), rhs.GetStackIndex()));
1727 } else {
1728 __ ucomisd(lhs_reg, rhs.AsFpuRegister<XmmRegister>());
1729 }
1730 GenerateFPJumps(cond, &true_label, &false_label);
1731 break;
1732 }
1733 }
1734
1735 // Convert the jumps into the result.
1736 NearLabel done_label;
1737
1738 // False case: result = 0.
1739 __ Bind(&false_label);
1740 __ xorl(reg, reg);
1741 __ jmp(&done_label);
1742
1743 // True case: result = 1.
1744 __ Bind(&true_label);
1745 __ movl(reg, Immediate(1));
1746 __ Bind(&done_label);
1747 }
1748
VisitEqual(HEqual * comp)1749 void LocationsBuilderX86_64::VisitEqual(HEqual* comp) {
1750 HandleCondition(comp);
1751 }
1752
VisitEqual(HEqual * comp)1753 void InstructionCodeGeneratorX86_64::VisitEqual(HEqual* comp) {
1754 HandleCondition(comp);
1755 }
1756
VisitNotEqual(HNotEqual * comp)1757 void LocationsBuilderX86_64::VisitNotEqual(HNotEqual* comp) {
1758 HandleCondition(comp);
1759 }
1760
VisitNotEqual(HNotEqual * comp)1761 void InstructionCodeGeneratorX86_64::VisitNotEqual(HNotEqual* comp) {
1762 HandleCondition(comp);
1763 }
1764
VisitLessThan(HLessThan * comp)1765 void LocationsBuilderX86_64::VisitLessThan(HLessThan* comp) {
1766 HandleCondition(comp);
1767 }
1768
VisitLessThan(HLessThan * comp)1769 void InstructionCodeGeneratorX86_64::VisitLessThan(HLessThan* comp) {
1770 HandleCondition(comp);
1771 }
1772
VisitLessThanOrEqual(HLessThanOrEqual * comp)1773 void LocationsBuilderX86_64::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
1774 HandleCondition(comp);
1775 }
1776
VisitLessThanOrEqual(HLessThanOrEqual * comp)1777 void InstructionCodeGeneratorX86_64::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
1778 HandleCondition(comp);
1779 }
1780
VisitGreaterThan(HGreaterThan * comp)1781 void LocationsBuilderX86_64::VisitGreaterThan(HGreaterThan* comp) {
1782 HandleCondition(comp);
1783 }
1784
VisitGreaterThan(HGreaterThan * comp)1785 void InstructionCodeGeneratorX86_64::VisitGreaterThan(HGreaterThan* comp) {
1786 HandleCondition(comp);
1787 }
1788
VisitGreaterThanOrEqual(HGreaterThanOrEqual * comp)1789 void LocationsBuilderX86_64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
1790 HandleCondition(comp);
1791 }
1792
VisitGreaterThanOrEqual(HGreaterThanOrEqual * comp)1793 void InstructionCodeGeneratorX86_64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
1794 HandleCondition(comp);
1795 }
1796
VisitBelow(HBelow * comp)1797 void LocationsBuilderX86_64::VisitBelow(HBelow* comp) {
1798 HandleCondition(comp);
1799 }
1800
VisitBelow(HBelow * comp)1801 void InstructionCodeGeneratorX86_64::VisitBelow(HBelow* comp) {
1802 HandleCondition(comp);
1803 }
1804
VisitBelowOrEqual(HBelowOrEqual * comp)1805 void LocationsBuilderX86_64::VisitBelowOrEqual(HBelowOrEqual* comp) {
1806 HandleCondition(comp);
1807 }
1808
VisitBelowOrEqual(HBelowOrEqual * comp)1809 void InstructionCodeGeneratorX86_64::VisitBelowOrEqual(HBelowOrEqual* comp) {
1810 HandleCondition(comp);
1811 }
1812
VisitAbove(HAbove * comp)1813 void LocationsBuilderX86_64::VisitAbove(HAbove* comp) {
1814 HandleCondition(comp);
1815 }
1816
VisitAbove(HAbove * comp)1817 void InstructionCodeGeneratorX86_64::VisitAbove(HAbove* comp) {
1818 HandleCondition(comp);
1819 }
1820
VisitAboveOrEqual(HAboveOrEqual * comp)1821 void LocationsBuilderX86_64::VisitAboveOrEqual(HAboveOrEqual* comp) {
1822 HandleCondition(comp);
1823 }
1824
VisitAboveOrEqual(HAboveOrEqual * comp)1825 void InstructionCodeGeneratorX86_64::VisitAboveOrEqual(HAboveOrEqual* comp) {
1826 HandleCondition(comp);
1827 }
1828
VisitCompare(HCompare * compare)1829 void LocationsBuilderX86_64::VisitCompare(HCompare* compare) {
1830 LocationSummary* locations =
1831 new (GetGraph()->GetArena()) LocationSummary(compare, LocationSummary::kNoCall);
1832 switch (compare->InputAt(0)->GetType()) {
1833 case Primitive::kPrimBoolean:
1834 case Primitive::kPrimByte:
1835 case Primitive::kPrimShort:
1836 case Primitive::kPrimChar:
1837 case Primitive::kPrimInt:
1838 case Primitive::kPrimLong: {
1839 locations->SetInAt(0, Location::RequiresRegister());
1840 locations->SetInAt(1, Location::Any());
1841 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
1842 break;
1843 }
1844 case Primitive::kPrimFloat:
1845 case Primitive::kPrimDouble: {
1846 locations->SetInAt(0, Location::RequiresFpuRegister());
1847 locations->SetInAt(1, Location::Any());
1848 locations->SetOut(Location::RequiresRegister());
1849 break;
1850 }
1851 default:
1852 LOG(FATAL) << "Unexpected type for compare operation " << compare->InputAt(0)->GetType();
1853 }
1854 }
1855
VisitCompare(HCompare * compare)1856 void InstructionCodeGeneratorX86_64::VisitCompare(HCompare* compare) {
1857 LocationSummary* locations = compare->GetLocations();
1858 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
1859 Location left = locations->InAt(0);
1860 Location right = locations->InAt(1);
1861
1862 NearLabel less, greater, done;
1863 Primitive::Type type = compare->InputAt(0)->GetType();
1864 Condition less_cond = kLess;
1865
1866 switch (type) {
1867 case Primitive::kPrimBoolean:
1868 case Primitive::kPrimByte:
1869 case Primitive::kPrimShort:
1870 case Primitive::kPrimChar:
1871 case Primitive::kPrimInt: {
1872 CpuRegister left_reg = left.AsRegister<CpuRegister>();
1873 if (right.IsConstant()) {
1874 int32_t value = right.GetConstant()->AsIntConstant()->GetValue();
1875 codegen_->Compare32BitValue(left_reg, value);
1876 } else if (right.IsStackSlot()) {
1877 __ cmpl(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
1878 } else {
1879 __ cmpl(left_reg, right.AsRegister<CpuRegister>());
1880 }
1881 break;
1882 }
1883 case Primitive::kPrimLong: {
1884 CpuRegister left_reg = left.AsRegister<CpuRegister>();
1885 if (right.IsConstant()) {
1886 int64_t value = right.GetConstant()->AsLongConstant()->GetValue();
1887 codegen_->Compare64BitValue(left_reg, value);
1888 } else if (right.IsDoubleStackSlot()) {
1889 __ cmpq(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
1890 } else {
1891 __ cmpq(left_reg, right.AsRegister<CpuRegister>());
1892 }
1893 break;
1894 }
1895 case Primitive::kPrimFloat: {
1896 XmmRegister left_reg = left.AsFpuRegister<XmmRegister>();
1897 if (right.IsConstant()) {
1898 float value = right.GetConstant()->AsFloatConstant()->GetValue();
1899 __ ucomiss(left_reg, codegen_->LiteralFloatAddress(value));
1900 } else if (right.IsStackSlot()) {
1901 __ ucomiss(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
1902 } else {
1903 __ ucomiss(left_reg, right.AsFpuRegister<XmmRegister>());
1904 }
1905 __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
1906 less_cond = kBelow; // ucomis{s,d} sets CF
1907 break;
1908 }
1909 case Primitive::kPrimDouble: {
1910 XmmRegister left_reg = left.AsFpuRegister<XmmRegister>();
1911 if (right.IsConstant()) {
1912 double value = right.GetConstant()->AsDoubleConstant()->GetValue();
1913 __ ucomisd(left_reg, codegen_->LiteralDoubleAddress(value));
1914 } else if (right.IsDoubleStackSlot()) {
1915 __ ucomisd(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
1916 } else {
1917 __ ucomisd(left_reg, right.AsFpuRegister<XmmRegister>());
1918 }
1919 __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
1920 less_cond = kBelow; // ucomis{s,d} sets CF
1921 break;
1922 }
1923 default:
1924 LOG(FATAL) << "Unexpected compare type " << type;
1925 }
1926
1927 __ movl(out, Immediate(0));
1928 __ j(kEqual, &done);
1929 __ j(less_cond, &less);
1930
1931 __ Bind(&greater);
1932 __ movl(out, Immediate(1));
1933 __ jmp(&done);
1934
1935 __ Bind(&less);
1936 __ movl(out, Immediate(-1));
1937
1938 __ Bind(&done);
1939 }
1940
VisitIntConstant(HIntConstant * constant)1941 void LocationsBuilderX86_64::VisitIntConstant(HIntConstant* constant) {
1942 LocationSummary* locations =
1943 new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
1944 locations->SetOut(Location::ConstantLocation(constant));
1945 }
1946
VisitIntConstant(HIntConstant * constant ATTRIBUTE_UNUSED)1947 void InstructionCodeGeneratorX86_64::VisitIntConstant(HIntConstant* constant ATTRIBUTE_UNUSED) {
1948 // Will be generated at use site.
1949 }
1950
VisitNullConstant(HNullConstant * constant)1951 void LocationsBuilderX86_64::VisitNullConstant(HNullConstant* constant) {
1952 LocationSummary* locations =
1953 new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
1954 locations->SetOut(Location::ConstantLocation(constant));
1955 }
1956
VisitNullConstant(HNullConstant * constant ATTRIBUTE_UNUSED)1957 void InstructionCodeGeneratorX86_64::VisitNullConstant(HNullConstant* constant ATTRIBUTE_UNUSED) {
1958 // Will be generated at use site.
1959 }
1960
VisitLongConstant(HLongConstant * constant)1961 void LocationsBuilderX86_64::VisitLongConstant(HLongConstant* constant) {
1962 LocationSummary* locations =
1963 new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
1964 locations->SetOut(Location::ConstantLocation(constant));
1965 }
1966
VisitLongConstant(HLongConstant * constant ATTRIBUTE_UNUSED)1967 void InstructionCodeGeneratorX86_64::VisitLongConstant(HLongConstant* constant ATTRIBUTE_UNUSED) {
1968 // Will be generated at use site.
1969 }
1970
VisitFloatConstant(HFloatConstant * constant)1971 void LocationsBuilderX86_64::VisitFloatConstant(HFloatConstant* constant) {
1972 LocationSummary* locations =
1973 new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
1974 locations->SetOut(Location::ConstantLocation(constant));
1975 }
1976
VisitFloatConstant(HFloatConstant * constant ATTRIBUTE_UNUSED)1977 void InstructionCodeGeneratorX86_64::VisitFloatConstant(HFloatConstant* constant ATTRIBUTE_UNUSED) {
1978 // Will be generated at use site.
1979 }
1980
VisitDoubleConstant(HDoubleConstant * constant)1981 void LocationsBuilderX86_64::VisitDoubleConstant(HDoubleConstant* constant) {
1982 LocationSummary* locations =
1983 new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
1984 locations->SetOut(Location::ConstantLocation(constant));
1985 }
1986
VisitDoubleConstant(HDoubleConstant * constant ATTRIBUTE_UNUSED)1987 void InstructionCodeGeneratorX86_64::VisitDoubleConstant(
1988 HDoubleConstant* constant ATTRIBUTE_UNUSED) {
1989 // Will be generated at use site.
1990 }
1991
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)1992 void LocationsBuilderX86_64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
1993 memory_barrier->SetLocations(nullptr);
1994 }
1995
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)1996 void InstructionCodeGeneratorX86_64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
1997 codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
1998 }
1999
VisitReturnVoid(HReturnVoid * ret)2000 void LocationsBuilderX86_64::VisitReturnVoid(HReturnVoid* ret) {
2001 ret->SetLocations(nullptr);
2002 }
2003
VisitReturnVoid(HReturnVoid * ret ATTRIBUTE_UNUSED)2004 void InstructionCodeGeneratorX86_64::VisitReturnVoid(HReturnVoid* ret ATTRIBUTE_UNUSED) {
2005 codegen_->GenerateFrameExit();
2006 }
2007
VisitReturn(HReturn * ret)2008 void LocationsBuilderX86_64::VisitReturn(HReturn* ret) {
2009 LocationSummary* locations =
2010 new (GetGraph()->GetArena()) LocationSummary(ret, LocationSummary::kNoCall);
2011 switch (ret->InputAt(0)->GetType()) {
2012 case Primitive::kPrimBoolean:
2013 case Primitive::kPrimByte:
2014 case Primitive::kPrimChar:
2015 case Primitive::kPrimShort:
2016 case Primitive::kPrimInt:
2017 case Primitive::kPrimNot:
2018 case Primitive::kPrimLong:
2019 locations->SetInAt(0, Location::RegisterLocation(RAX));
2020 break;
2021
2022 case Primitive::kPrimFloat:
2023 case Primitive::kPrimDouble:
2024 locations->SetInAt(0, Location::FpuRegisterLocation(XMM0));
2025 break;
2026
2027 default:
2028 LOG(FATAL) << "Unexpected return type " << ret->InputAt(0)->GetType();
2029 }
2030 }
2031
VisitReturn(HReturn * ret)2032 void InstructionCodeGeneratorX86_64::VisitReturn(HReturn* ret) {
2033 if (kIsDebugBuild) {
2034 switch (ret->InputAt(0)->GetType()) {
2035 case Primitive::kPrimBoolean:
2036 case Primitive::kPrimByte:
2037 case Primitive::kPrimChar:
2038 case Primitive::kPrimShort:
2039 case Primitive::kPrimInt:
2040 case Primitive::kPrimNot:
2041 case Primitive::kPrimLong:
2042 DCHECK_EQ(ret->GetLocations()->InAt(0).AsRegister<CpuRegister>().AsRegister(), RAX);
2043 break;
2044
2045 case Primitive::kPrimFloat:
2046 case Primitive::kPrimDouble:
2047 DCHECK_EQ(ret->GetLocations()->InAt(0).AsFpuRegister<XmmRegister>().AsFloatRegister(),
2048 XMM0);
2049 break;
2050
2051 default:
2052 LOG(FATAL) << "Unexpected return type " << ret->InputAt(0)->GetType();
2053 }
2054 }
2055 codegen_->GenerateFrameExit();
2056 }
2057
GetReturnLocation(Primitive::Type type) const2058 Location InvokeDexCallingConventionVisitorX86_64::GetReturnLocation(Primitive::Type type) const {
2059 switch (type) {
2060 case Primitive::kPrimBoolean:
2061 case Primitive::kPrimByte:
2062 case Primitive::kPrimChar:
2063 case Primitive::kPrimShort:
2064 case Primitive::kPrimInt:
2065 case Primitive::kPrimNot:
2066 case Primitive::kPrimLong:
2067 return Location::RegisterLocation(RAX);
2068
2069 case Primitive::kPrimVoid:
2070 return Location::NoLocation();
2071
2072 case Primitive::kPrimDouble:
2073 case Primitive::kPrimFloat:
2074 return Location::FpuRegisterLocation(XMM0);
2075 }
2076
2077 UNREACHABLE();
2078 }
2079
GetMethodLocation() const2080 Location InvokeDexCallingConventionVisitorX86_64::GetMethodLocation() const {
2081 return Location::RegisterLocation(kMethodRegisterArgument);
2082 }
2083
GetNextLocation(Primitive::Type type)2084 Location InvokeDexCallingConventionVisitorX86_64::GetNextLocation(Primitive::Type type) {
2085 switch (type) {
2086 case Primitive::kPrimBoolean:
2087 case Primitive::kPrimByte:
2088 case Primitive::kPrimChar:
2089 case Primitive::kPrimShort:
2090 case Primitive::kPrimInt:
2091 case Primitive::kPrimNot: {
2092 uint32_t index = gp_index_++;
2093 stack_index_++;
2094 if (index < calling_convention.GetNumberOfRegisters()) {
2095 return Location::RegisterLocation(calling_convention.GetRegisterAt(index));
2096 } else {
2097 return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1));
2098 }
2099 }
2100
2101 case Primitive::kPrimLong: {
2102 uint32_t index = gp_index_;
2103 stack_index_ += 2;
2104 if (index < calling_convention.GetNumberOfRegisters()) {
2105 gp_index_ += 1;
2106 return Location::RegisterLocation(calling_convention.GetRegisterAt(index));
2107 } else {
2108 gp_index_ += 2;
2109 return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2));
2110 }
2111 }
2112
2113 case Primitive::kPrimFloat: {
2114 uint32_t index = float_index_++;
2115 stack_index_++;
2116 if (index < calling_convention.GetNumberOfFpuRegisters()) {
2117 return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
2118 } else {
2119 return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1));
2120 }
2121 }
2122
2123 case Primitive::kPrimDouble: {
2124 uint32_t index = float_index_++;
2125 stack_index_ += 2;
2126 if (index < calling_convention.GetNumberOfFpuRegisters()) {
2127 return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
2128 } else {
2129 return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2));
2130 }
2131 }
2132
2133 case Primitive::kPrimVoid:
2134 LOG(FATAL) << "Unexpected parameter type " << type;
2135 break;
2136 }
2137 return Location::NoLocation();
2138 }
2139
VisitInvokeUnresolved(HInvokeUnresolved * invoke)2140 void LocationsBuilderX86_64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
2141 // The trampoline uses the same calling convention as dex calling conventions,
2142 // except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain
2143 // the method_idx.
2144 HandleInvoke(invoke);
2145 }
2146
VisitInvokeUnresolved(HInvokeUnresolved * invoke)2147 void InstructionCodeGeneratorX86_64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
2148 codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke);
2149 }
2150
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)2151 void LocationsBuilderX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
2152 // Explicit clinit checks triggered by static invokes must have been pruned by
2153 // art::PrepareForRegisterAllocation.
2154 DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
2155
2156 IntrinsicLocationsBuilderX86_64 intrinsic(codegen_);
2157 if (intrinsic.TryDispatch(invoke)) {
2158 return;
2159 }
2160
2161 HandleInvoke(invoke);
2162 }
2163
TryGenerateIntrinsicCode(HInvoke * invoke,CodeGeneratorX86_64 * codegen)2164 static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
2165 if (invoke->GetLocations()->Intrinsified()) {
2166 IntrinsicCodeGeneratorX86_64 intrinsic(codegen);
2167 intrinsic.Dispatch(invoke);
2168 return true;
2169 }
2170 return false;
2171 }
2172
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)2173 void InstructionCodeGeneratorX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
2174 // Explicit clinit checks triggered by static invokes must have been pruned by
2175 // art::PrepareForRegisterAllocation.
2176 DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
2177
2178 if (TryGenerateIntrinsicCode(invoke, codegen_)) {
2179 return;
2180 }
2181
2182 LocationSummary* locations = invoke->GetLocations();
2183 codegen_->GenerateStaticOrDirectCall(
2184 invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation());
2185 codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
2186 }
2187
HandleInvoke(HInvoke * invoke)2188 void LocationsBuilderX86_64::HandleInvoke(HInvoke* invoke) {
2189 InvokeDexCallingConventionVisitorX86_64 calling_convention_visitor;
2190 CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
2191 }
2192
VisitInvokeVirtual(HInvokeVirtual * invoke)2193 void LocationsBuilderX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
2194 IntrinsicLocationsBuilderX86_64 intrinsic(codegen_);
2195 if (intrinsic.TryDispatch(invoke)) {
2196 return;
2197 }
2198
2199 HandleInvoke(invoke);
2200 }
2201
VisitInvokeVirtual(HInvokeVirtual * invoke)2202 void InstructionCodeGeneratorX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
2203 if (TryGenerateIntrinsicCode(invoke, codegen_)) {
2204 return;
2205 }
2206
2207 codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
2208 DCHECK(!codegen_->IsLeafMethod());
2209 codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
2210 }
2211
VisitInvokeInterface(HInvokeInterface * invoke)2212 void LocationsBuilderX86_64::VisitInvokeInterface(HInvokeInterface* invoke) {
2213 HandleInvoke(invoke);
2214 // Add the hidden argument.
2215 invoke->GetLocations()->AddTemp(Location::RegisterLocation(RAX));
2216 }
2217
VisitInvokeInterface(HInvokeInterface * invoke)2218 void InstructionCodeGeneratorX86_64::VisitInvokeInterface(HInvokeInterface* invoke) {
2219 // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
2220 LocationSummary* locations = invoke->GetLocations();
2221 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
2222 CpuRegister hidden_reg = locations->GetTemp(1).AsRegister<CpuRegister>();
2223 uint32_t method_offset = mirror::Class::EmbeddedImTableEntryOffset(
2224 invoke->GetImtIndex() % mirror::Class::kImtSize, kX86_64PointerSize).Uint32Value();
2225 Location receiver = locations->InAt(0);
2226 size_t class_offset = mirror::Object::ClassOffset().SizeValue();
2227
2228 // Set the hidden argument. This is safe to do this here, as RAX
2229 // won't be modified thereafter, before the `call` instruction.
2230 DCHECK_EQ(RAX, hidden_reg.AsRegister());
2231 codegen_->Load64BitValue(hidden_reg, invoke->GetDexMethodIndex());
2232
2233 if (receiver.IsStackSlot()) {
2234 __ movl(temp, Address(CpuRegister(RSP), receiver.GetStackIndex()));
2235 // /* HeapReference<Class> */ temp = temp->klass_
2236 __ movl(temp, Address(temp, class_offset));
2237 } else {
2238 // /* HeapReference<Class> */ temp = receiver->klass_
2239 __ movl(temp, Address(receiver.AsRegister<CpuRegister>(), class_offset));
2240 }
2241 codegen_->MaybeRecordImplicitNullCheck(invoke);
2242 // Instead of simply (possibly) unpoisoning `temp` here, we should
2243 // emit a read barrier for the previous class reference load.
2244 // However this is not required in practice, as this is an
2245 // intermediate/temporary reference and because the current
2246 // concurrent copying collector keeps the from-space memory
2247 // intact/accessible until the end of the marking phase (the
2248 // concurrent copying collector may not in the future).
2249 __ MaybeUnpoisonHeapReference(temp);
2250 // temp = temp->GetImtEntryAt(method_offset);
2251 __ movq(temp, Address(temp, method_offset));
2252 // call temp->GetEntryPoint();
2253 __ call(Address(temp,
2254 ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86_64WordSize).SizeValue()));
2255
2256 DCHECK(!codegen_->IsLeafMethod());
2257 codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
2258 }
2259
VisitNeg(HNeg * neg)2260 void LocationsBuilderX86_64::VisitNeg(HNeg* neg) {
2261 LocationSummary* locations =
2262 new (GetGraph()->GetArena()) LocationSummary(neg, LocationSummary::kNoCall);
2263 switch (neg->GetResultType()) {
2264 case Primitive::kPrimInt:
2265 case Primitive::kPrimLong:
2266 locations->SetInAt(0, Location::RequiresRegister());
2267 locations->SetOut(Location::SameAsFirstInput());
2268 break;
2269
2270 case Primitive::kPrimFloat:
2271 case Primitive::kPrimDouble:
2272 locations->SetInAt(0, Location::RequiresFpuRegister());
2273 locations->SetOut(Location::SameAsFirstInput());
2274 locations->AddTemp(Location::RequiresFpuRegister());
2275 break;
2276
2277 default:
2278 LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
2279 }
2280 }
2281
VisitNeg(HNeg * neg)2282 void InstructionCodeGeneratorX86_64::VisitNeg(HNeg* neg) {
2283 LocationSummary* locations = neg->GetLocations();
2284 Location out = locations->Out();
2285 Location in = locations->InAt(0);
2286 switch (neg->GetResultType()) {
2287 case Primitive::kPrimInt:
2288 DCHECK(in.IsRegister());
2289 DCHECK(in.Equals(out));
2290 __ negl(out.AsRegister<CpuRegister>());
2291 break;
2292
2293 case Primitive::kPrimLong:
2294 DCHECK(in.IsRegister());
2295 DCHECK(in.Equals(out));
2296 __ negq(out.AsRegister<CpuRegister>());
2297 break;
2298
2299 case Primitive::kPrimFloat: {
2300 DCHECK(in.Equals(out));
2301 XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
2302 // Implement float negation with an exclusive or with value
2303 // 0x80000000 (mask for bit 31, representing the sign of a
2304 // single-precision floating-point number).
2305 __ movss(mask, codegen_->LiteralInt32Address(0x80000000));
2306 __ xorps(out.AsFpuRegister<XmmRegister>(), mask);
2307 break;
2308 }
2309
2310 case Primitive::kPrimDouble: {
2311 DCHECK(in.Equals(out));
2312 XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
2313 // Implement double negation with an exclusive or with value
2314 // 0x8000000000000000 (mask for bit 63, representing the sign of
2315 // a double-precision floating-point number).
2316 __ movsd(mask, codegen_->LiteralInt64Address(INT64_C(0x8000000000000000)));
2317 __ xorpd(out.AsFpuRegister<XmmRegister>(), mask);
2318 break;
2319 }
2320
2321 default:
2322 LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
2323 }
2324 }
2325
VisitTypeConversion(HTypeConversion * conversion)2326 void LocationsBuilderX86_64::VisitTypeConversion(HTypeConversion* conversion) {
2327 LocationSummary* locations =
2328 new (GetGraph()->GetArena()) LocationSummary(conversion, LocationSummary::kNoCall);
2329 Primitive::Type result_type = conversion->GetResultType();
2330 Primitive::Type input_type = conversion->GetInputType();
2331 DCHECK_NE(result_type, input_type);
2332
2333 // The Java language does not allow treating boolean as an integral type but
2334 // our bit representation makes it safe.
2335
2336 switch (result_type) {
2337 case Primitive::kPrimByte:
2338 switch (input_type) {
2339 case Primitive::kPrimLong:
2340 // Type conversion from long to byte is a result of code transformations.
2341 case Primitive::kPrimBoolean:
2342 // Boolean input is a result of code transformations.
2343 case Primitive::kPrimShort:
2344 case Primitive::kPrimInt:
2345 case Primitive::kPrimChar:
2346 // Processing a Dex `int-to-byte' instruction.
2347 locations->SetInAt(0, Location::Any());
2348 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2349 break;
2350
2351 default:
2352 LOG(FATAL) << "Unexpected type conversion from " << input_type
2353 << " to " << result_type;
2354 }
2355 break;
2356
2357 case Primitive::kPrimShort:
2358 switch (input_type) {
2359 case Primitive::kPrimLong:
2360 // Type conversion from long to short is a result of code transformations.
2361 case Primitive::kPrimBoolean:
2362 // Boolean input is a result of code transformations.
2363 case Primitive::kPrimByte:
2364 case Primitive::kPrimInt:
2365 case Primitive::kPrimChar:
2366 // Processing a Dex `int-to-short' instruction.
2367 locations->SetInAt(0, Location::Any());
2368 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2369 break;
2370
2371 default:
2372 LOG(FATAL) << "Unexpected type conversion from " << input_type
2373 << " to " << result_type;
2374 }
2375 break;
2376
2377 case Primitive::kPrimInt:
2378 switch (input_type) {
2379 case Primitive::kPrimLong:
2380 // Processing a Dex `long-to-int' instruction.
2381 locations->SetInAt(0, Location::Any());
2382 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2383 break;
2384
2385 case Primitive::kPrimFloat:
2386 // Processing a Dex `float-to-int' instruction.
2387 locations->SetInAt(0, Location::RequiresFpuRegister());
2388 locations->SetOut(Location::RequiresRegister());
2389 break;
2390
2391 case Primitive::kPrimDouble:
2392 // Processing a Dex `double-to-int' instruction.
2393 locations->SetInAt(0, Location::RequiresFpuRegister());
2394 locations->SetOut(Location::RequiresRegister());
2395 break;
2396
2397 default:
2398 LOG(FATAL) << "Unexpected type conversion from " << input_type
2399 << " to " << result_type;
2400 }
2401 break;
2402
2403 case Primitive::kPrimLong:
2404 switch (input_type) {
2405 case Primitive::kPrimBoolean:
2406 // Boolean input is a result of code transformations.
2407 case Primitive::kPrimByte:
2408 case Primitive::kPrimShort:
2409 case Primitive::kPrimInt:
2410 case Primitive::kPrimChar:
2411 // Processing a Dex `int-to-long' instruction.
2412 // TODO: We would benefit from a (to-be-implemented)
2413 // Location::RegisterOrStackSlot requirement for this input.
2414 locations->SetInAt(0, Location::RequiresRegister());
2415 locations->SetOut(Location::RequiresRegister());
2416 break;
2417
2418 case Primitive::kPrimFloat:
2419 // Processing a Dex `float-to-long' instruction.
2420 locations->SetInAt(0, Location::RequiresFpuRegister());
2421 locations->SetOut(Location::RequiresRegister());
2422 break;
2423
2424 case Primitive::kPrimDouble:
2425 // Processing a Dex `double-to-long' instruction.
2426 locations->SetInAt(0, Location::RequiresFpuRegister());
2427 locations->SetOut(Location::RequiresRegister());
2428 break;
2429
2430 default:
2431 LOG(FATAL) << "Unexpected type conversion from " << input_type
2432 << " to " << result_type;
2433 }
2434 break;
2435
2436 case Primitive::kPrimChar:
2437 switch (input_type) {
2438 case Primitive::kPrimLong:
2439 // Type conversion from long to char is a result of code transformations.
2440 case Primitive::kPrimBoolean:
2441 // Boolean input is a result of code transformations.
2442 case Primitive::kPrimByte:
2443 case Primitive::kPrimShort:
2444 case Primitive::kPrimInt:
2445 // Processing a Dex `int-to-char' instruction.
2446 locations->SetInAt(0, Location::Any());
2447 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2448 break;
2449
2450 default:
2451 LOG(FATAL) << "Unexpected type conversion from " << input_type
2452 << " to " << result_type;
2453 }
2454 break;
2455
2456 case Primitive::kPrimFloat:
2457 switch (input_type) {
2458 case Primitive::kPrimBoolean:
2459 // Boolean input is a result of code transformations.
2460 case Primitive::kPrimByte:
2461 case Primitive::kPrimShort:
2462 case Primitive::kPrimInt:
2463 case Primitive::kPrimChar:
2464 // Processing a Dex `int-to-float' instruction.
2465 locations->SetInAt(0, Location::Any());
2466 locations->SetOut(Location::RequiresFpuRegister());
2467 break;
2468
2469 case Primitive::kPrimLong:
2470 // Processing a Dex `long-to-float' instruction.
2471 locations->SetInAt(0, Location::Any());
2472 locations->SetOut(Location::RequiresFpuRegister());
2473 break;
2474
2475 case Primitive::kPrimDouble:
2476 // Processing a Dex `double-to-float' instruction.
2477 locations->SetInAt(0, Location::Any());
2478 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
2479 break;
2480
2481 default:
2482 LOG(FATAL) << "Unexpected type conversion from " << input_type
2483 << " to " << result_type;
2484 };
2485 break;
2486
2487 case Primitive::kPrimDouble:
2488 switch (input_type) {
2489 case Primitive::kPrimBoolean:
2490 // Boolean input is a result of code transformations.
2491 case Primitive::kPrimByte:
2492 case Primitive::kPrimShort:
2493 case Primitive::kPrimInt:
2494 case Primitive::kPrimChar:
2495 // Processing a Dex `int-to-double' instruction.
2496 locations->SetInAt(0, Location::Any());
2497 locations->SetOut(Location::RequiresFpuRegister());
2498 break;
2499
2500 case Primitive::kPrimLong:
2501 // Processing a Dex `long-to-double' instruction.
2502 locations->SetInAt(0, Location::Any());
2503 locations->SetOut(Location::RequiresFpuRegister());
2504 break;
2505
2506 case Primitive::kPrimFloat:
2507 // Processing a Dex `float-to-double' instruction.
2508 locations->SetInAt(0, Location::Any());
2509 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
2510 break;
2511
2512 default:
2513 LOG(FATAL) << "Unexpected type conversion from " << input_type
2514 << " to " << result_type;
2515 }
2516 break;
2517
2518 default:
2519 LOG(FATAL) << "Unexpected type conversion from " << input_type
2520 << " to " << result_type;
2521 }
2522 }
2523
VisitTypeConversion(HTypeConversion * conversion)2524 void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conversion) {
2525 LocationSummary* locations = conversion->GetLocations();
2526 Location out = locations->Out();
2527 Location in = locations->InAt(0);
2528 Primitive::Type result_type = conversion->GetResultType();
2529 Primitive::Type input_type = conversion->GetInputType();
2530 DCHECK_NE(result_type, input_type);
2531 switch (result_type) {
2532 case Primitive::kPrimByte:
2533 switch (input_type) {
2534 case Primitive::kPrimLong:
2535 // Type conversion from long to byte is a result of code transformations.
2536 case Primitive::kPrimBoolean:
2537 // Boolean input is a result of code transformations.
2538 case Primitive::kPrimShort:
2539 case Primitive::kPrimInt:
2540 case Primitive::kPrimChar:
2541 // Processing a Dex `int-to-byte' instruction.
2542 if (in.IsRegister()) {
2543 __ movsxb(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
2544 } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
2545 __ movsxb(out.AsRegister<CpuRegister>(),
2546 Address(CpuRegister(RSP), in.GetStackIndex()));
2547 } else {
2548 __ movl(out.AsRegister<CpuRegister>(),
2549 Immediate(static_cast<int8_t>(Int64FromConstant(in.GetConstant()))));
2550 }
2551 break;
2552
2553 default:
2554 LOG(FATAL) << "Unexpected type conversion from " << input_type
2555 << " to " << result_type;
2556 }
2557 break;
2558
2559 case Primitive::kPrimShort:
2560 switch (input_type) {
2561 case Primitive::kPrimLong:
2562 // Type conversion from long to short is a result of code transformations.
2563 case Primitive::kPrimBoolean:
2564 // Boolean input is a result of code transformations.
2565 case Primitive::kPrimByte:
2566 case Primitive::kPrimInt:
2567 case Primitive::kPrimChar:
2568 // Processing a Dex `int-to-short' instruction.
2569 if (in.IsRegister()) {
2570 __ movsxw(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
2571 } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
2572 __ movsxw(out.AsRegister<CpuRegister>(),
2573 Address(CpuRegister(RSP), in.GetStackIndex()));
2574 } else {
2575 __ movl(out.AsRegister<CpuRegister>(),
2576 Immediate(static_cast<int16_t>(Int64FromConstant(in.GetConstant()))));
2577 }
2578 break;
2579
2580 default:
2581 LOG(FATAL) << "Unexpected type conversion from " << input_type
2582 << " to " << result_type;
2583 }
2584 break;
2585
2586 case Primitive::kPrimInt:
2587 switch (input_type) {
2588 case Primitive::kPrimLong:
2589 // Processing a Dex `long-to-int' instruction.
2590 if (in.IsRegister()) {
2591 __ movl(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
2592 } else if (in.IsDoubleStackSlot()) {
2593 __ movl(out.AsRegister<CpuRegister>(),
2594 Address(CpuRegister(RSP), in.GetStackIndex()));
2595 } else {
2596 DCHECK(in.IsConstant());
2597 DCHECK(in.GetConstant()->IsLongConstant());
2598 int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
2599 __ movl(out.AsRegister<CpuRegister>(), Immediate(static_cast<int32_t>(value)));
2600 }
2601 break;
2602
2603 case Primitive::kPrimFloat: {
2604 // Processing a Dex `float-to-int' instruction.
2605 XmmRegister input = in.AsFpuRegister<XmmRegister>();
2606 CpuRegister output = out.AsRegister<CpuRegister>();
2607 NearLabel done, nan;
2608
2609 __ movl(output, Immediate(kPrimIntMax));
2610 // if input >= (float)INT_MAX goto done
2611 __ comiss(input, codegen_->LiteralFloatAddress(kPrimIntMax));
2612 __ j(kAboveEqual, &done);
2613 // if input == NaN goto nan
2614 __ j(kUnordered, &nan);
2615 // output = float-to-int-truncate(input)
2616 __ cvttss2si(output, input, false);
2617 __ jmp(&done);
2618 __ Bind(&nan);
2619 // output = 0
2620 __ xorl(output, output);
2621 __ Bind(&done);
2622 break;
2623 }
2624
2625 case Primitive::kPrimDouble: {
2626 // Processing a Dex `double-to-int' instruction.
2627 XmmRegister input = in.AsFpuRegister<XmmRegister>();
2628 CpuRegister output = out.AsRegister<CpuRegister>();
2629 NearLabel done, nan;
2630
2631 __ movl(output, Immediate(kPrimIntMax));
2632 // if input >= (double)INT_MAX goto done
2633 __ comisd(input, codegen_->LiteralDoubleAddress(kPrimIntMax));
2634 __ j(kAboveEqual, &done);
2635 // if input == NaN goto nan
2636 __ j(kUnordered, &nan);
2637 // output = double-to-int-truncate(input)
2638 __ cvttsd2si(output, input);
2639 __ jmp(&done);
2640 __ Bind(&nan);
2641 // output = 0
2642 __ xorl(output, output);
2643 __ Bind(&done);
2644 break;
2645 }
2646
2647 default:
2648 LOG(FATAL) << "Unexpected type conversion from " << input_type
2649 << " to " << result_type;
2650 }
2651 break;
2652
2653 case Primitive::kPrimLong:
2654 switch (input_type) {
2655 DCHECK(out.IsRegister());
2656 case Primitive::kPrimBoolean:
2657 // Boolean input is a result of code transformations.
2658 case Primitive::kPrimByte:
2659 case Primitive::kPrimShort:
2660 case Primitive::kPrimInt:
2661 case Primitive::kPrimChar:
2662 // Processing a Dex `int-to-long' instruction.
2663 DCHECK(in.IsRegister());
2664 __ movsxd(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
2665 break;
2666
2667 case Primitive::kPrimFloat: {
2668 // Processing a Dex `float-to-long' instruction.
2669 XmmRegister input = in.AsFpuRegister<XmmRegister>();
2670 CpuRegister output = out.AsRegister<CpuRegister>();
2671 NearLabel done, nan;
2672
2673 codegen_->Load64BitValue(output, kPrimLongMax);
2674 // if input >= (float)LONG_MAX goto done
2675 __ comiss(input, codegen_->LiteralFloatAddress(kPrimLongMax));
2676 __ j(kAboveEqual, &done);
2677 // if input == NaN goto nan
2678 __ j(kUnordered, &nan);
2679 // output = float-to-long-truncate(input)
2680 __ cvttss2si(output, input, true);
2681 __ jmp(&done);
2682 __ Bind(&nan);
2683 // output = 0
2684 __ xorl(output, output);
2685 __ Bind(&done);
2686 break;
2687 }
2688
2689 case Primitive::kPrimDouble: {
2690 // Processing a Dex `double-to-long' instruction.
2691 XmmRegister input = in.AsFpuRegister<XmmRegister>();
2692 CpuRegister output = out.AsRegister<CpuRegister>();
2693 NearLabel done, nan;
2694
2695 codegen_->Load64BitValue(output, kPrimLongMax);
2696 // if input >= (double)LONG_MAX goto done
2697 __ comisd(input, codegen_->LiteralDoubleAddress(kPrimLongMax));
2698 __ j(kAboveEqual, &done);
2699 // if input == NaN goto nan
2700 __ j(kUnordered, &nan);
2701 // output = double-to-long-truncate(input)
2702 __ cvttsd2si(output, input, true);
2703 __ jmp(&done);
2704 __ Bind(&nan);
2705 // output = 0
2706 __ xorl(output, output);
2707 __ Bind(&done);
2708 break;
2709 }
2710
2711 default:
2712 LOG(FATAL) << "Unexpected type conversion from " << input_type
2713 << " to " << result_type;
2714 }
2715 break;
2716
2717 case Primitive::kPrimChar:
2718 switch (input_type) {
2719 case Primitive::kPrimLong:
2720 // Type conversion from long to char is a result of code transformations.
2721 case Primitive::kPrimBoolean:
2722 // Boolean input is a result of code transformations.
2723 case Primitive::kPrimByte:
2724 case Primitive::kPrimShort:
2725 case Primitive::kPrimInt:
2726 // Processing a Dex `int-to-char' instruction.
2727 if (in.IsRegister()) {
2728 __ movzxw(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
2729 } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
2730 __ movzxw(out.AsRegister<CpuRegister>(),
2731 Address(CpuRegister(RSP), in.GetStackIndex()));
2732 } else {
2733 __ movl(out.AsRegister<CpuRegister>(),
2734 Immediate(static_cast<uint16_t>(Int64FromConstant(in.GetConstant()))));
2735 }
2736 break;
2737
2738 default:
2739 LOG(FATAL) << "Unexpected type conversion from " << input_type
2740 << " to " << result_type;
2741 }
2742 break;
2743
2744 case Primitive::kPrimFloat:
2745 switch (input_type) {
2746 case Primitive::kPrimBoolean:
2747 // Boolean input is a result of code transformations.
2748 case Primitive::kPrimByte:
2749 case Primitive::kPrimShort:
2750 case Primitive::kPrimInt:
2751 case Primitive::kPrimChar:
2752 // Processing a Dex `int-to-float' instruction.
2753 if (in.IsRegister()) {
2754 __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), false);
2755 } else if (in.IsConstant()) {
2756 int32_t v = in.GetConstant()->AsIntConstant()->GetValue();
2757 XmmRegister dest = out.AsFpuRegister<XmmRegister>();
2758 codegen_->Load32BitValue(dest, static_cast<float>(v));
2759 } else {
2760 __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(),
2761 Address(CpuRegister(RSP), in.GetStackIndex()), false);
2762 }
2763 break;
2764
2765 case Primitive::kPrimLong:
2766 // Processing a Dex `long-to-float' instruction.
2767 if (in.IsRegister()) {
2768 __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), true);
2769 } else if (in.IsConstant()) {
2770 int64_t v = in.GetConstant()->AsLongConstant()->GetValue();
2771 XmmRegister dest = out.AsFpuRegister<XmmRegister>();
2772 codegen_->Load32BitValue(dest, static_cast<float>(v));
2773 } else {
2774 __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(),
2775 Address(CpuRegister(RSP), in.GetStackIndex()), true);
2776 }
2777 break;
2778
2779 case Primitive::kPrimDouble:
2780 // Processing a Dex `double-to-float' instruction.
2781 if (in.IsFpuRegister()) {
2782 __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
2783 } else if (in.IsConstant()) {
2784 double v = in.GetConstant()->AsDoubleConstant()->GetValue();
2785 XmmRegister dest = out.AsFpuRegister<XmmRegister>();
2786 codegen_->Load32BitValue(dest, static_cast<float>(v));
2787 } else {
2788 __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(),
2789 Address(CpuRegister(RSP), in.GetStackIndex()));
2790 }
2791 break;
2792
2793 default:
2794 LOG(FATAL) << "Unexpected type conversion from " << input_type
2795 << " to " << result_type;
2796 };
2797 break;
2798
2799 case Primitive::kPrimDouble:
2800 switch (input_type) {
2801 case Primitive::kPrimBoolean:
2802 // Boolean input is a result of code transformations.
2803 case Primitive::kPrimByte:
2804 case Primitive::kPrimShort:
2805 case Primitive::kPrimInt:
2806 case Primitive::kPrimChar:
2807 // Processing a Dex `int-to-double' instruction.
2808 if (in.IsRegister()) {
2809 __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), false);
2810 } else if (in.IsConstant()) {
2811 int32_t v = in.GetConstant()->AsIntConstant()->GetValue();
2812 XmmRegister dest = out.AsFpuRegister<XmmRegister>();
2813 codegen_->Load64BitValue(dest, static_cast<double>(v));
2814 } else {
2815 __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(),
2816 Address(CpuRegister(RSP), in.GetStackIndex()), false);
2817 }
2818 break;
2819
2820 case Primitive::kPrimLong:
2821 // Processing a Dex `long-to-double' instruction.
2822 if (in.IsRegister()) {
2823 __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), true);
2824 } else if (in.IsConstant()) {
2825 int64_t v = in.GetConstant()->AsLongConstant()->GetValue();
2826 XmmRegister dest = out.AsFpuRegister<XmmRegister>();
2827 codegen_->Load64BitValue(dest, static_cast<double>(v));
2828 } else {
2829 __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(),
2830 Address(CpuRegister(RSP), in.GetStackIndex()), true);
2831 }
2832 break;
2833
2834 case Primitive::kPrimFloat:
2835 // Processing a Dex `float-to-double' instruction.
2836 if (in.IsFpuRegister()) {
2837 __ cvtss2sd(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
2838 } else if (in.IsConstant()) {
2839 float v = in.GetConstant()->AsFloatConstant()->GetValue();
2840 XmmRegister dest = out.AsFpuRegister<XmmRegister>();
2841 codegen_->Load64BitValue(dest, static_cast<double>(v));
2842 } else {
2843 __ cvtss2sd(out.AsFpuRegister<XmmRegister>(),
2844 Address(CpuRegister(RSP), in.GetStackIndex()));
2845 }
2846 break;
2847
2848 default:
2849 LOG(FATAL) << "Unexpected type conversion from " << input_type
2850 << " to " << result_type;
2851 };
2852 break;
2853
2854 default:
2855 LOG(FATAL) << "Unexpected type conversion from " << input_type
2856 << " to " << result_type;
2857 }
2858 }
2859
VisitAdd(HAdd * add)2860 void LocationsBuilderX86_64::VisitAdd(HAdd* add) {
2861 LocationSummary* locations =
2862 new (GetGraph()->GetArena()) LocationSummary(add, LocationSummary::kNoCall);
2863 switch (add->GetResultType()) {
2864 case Primitive::kPrimInt: {
2865 locations->SetInAt(0, Location::RequiresRegister());
2866 locations->SetInAt(1, Location::RegisterOrConstant(add->InputAt(1)));
2867 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2868 break;
2869 }
2870
2871 case Primitive::kPrimLong: {
2872 locations->SetInAt(0, Location::RequiresRegister());
2873 // We can use a leaq or addq if the constant can fit in an immediate.
2874 locations->SetInAt(1, Location::RegisterOrInt32Constant(add->InputAt(1)));
2875 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2876 break;
2877 }
2878
2879 case Primitive::kPrimDouble:
2880 case Primitive::kPrimFloat: {
2881 locations->SetInAt(0, Location::RequiresFpuRegister());
2882 locations->SetInAt(1, Location::Any());
2883 locations->SetOut(Location::SameAsFirstInput());
2884 break;
2885 }
2886
2887 default:
2888 LOG(FATAL) << "Unexpected add type " << add->GetResultType();
2889 }
2890 }
2891
VisitAdd(HAdd * add)2892 void InstructionCodeGeneratorX86_64::VisitAdd(HAdd* add) {
2893 LocationSummary* locations = add->GetLocations();
2894 Location first = locations->InAt(0);
2895 Location second = locations->InAt(1);
2896 Location out = locations->Out();
2897
2898 switch (add->GetResultType()) {
2899 case Primitive::kPrimInt: {
2900 if (second.IsRegister()) {
2901 if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
2902 __ addl(out.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
2903 } else if (out.AsRegister<Register>() == second.AsRegister<Register>()) {
2904 __ addl(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>());
2905 } else {
2906 __ leal(out.AsRegister<CpuRegister>(), Address(
2907 first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>(), TIMES_1, 0));
2908 }
2909 } else if (second.IsConstant()) {
2910 if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
2911 __ addl(out.AsRegister<CpuRegister>(),
2912 Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
2913 } else {
2914 __ leal(out.AsRegister<CpuRegister>(), Address(
2915 first.AsRegister<CpuRegister>(), second.GetConstant()->AsIntConstant()->GetValue()));
2916 }
2917 } else {
2918 DCHECK(first.Equals(locations->Out()));
2919 __ addl(first.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), second.GetStackIndex()));
2920 }
2921 break;
2922 }
2923
2924 case Primitive::kPrimLong: {
2925 if (second.IsRegister()) {
2926 if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
2927 __ addq(out.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
2928 } else if (out.AsRegister<Register>() == second.AsRegister<Register>()) {
2929 __ addq(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>());
2930 } else {
2931 __ leaq(out.AsRegister<CpuRegister>(), Address(
2932 first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>(), TIMES_1, 0));
2933 }
2934 } else {
2935 DCHECK(second.IsConstant());
2936 int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
2937 int32_t int32_value = Low32Bits(value);
2938 DCHECK_EQ(int32_value, value);
2939 if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
2940 __ addq(out.AsRegister<CpuRegister>(), Immediate(int32_value));
2941 } else {
2942 __ leaq(out.AsRegister<CpuRegister>(), Address(
2943 first.AsRegister<CpuRegister>(), int32_value));
2944 }
2945 }
2946 break;
2947 }
2948
2949 case Primitive::kPrimFloat: {
2950 if (second.IsFpuRegister()) {
2951 __ addss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
2952 } else if (second.IsConstant()) {
2953 __ addss(first.AsFpuRegister<XmmRegister>(),
2954 codegen_->LiteralFloatAddress(
2955 second.GetConstant()->AsFloatConstant()->GetValue()));
2956 } else {
2957 DCHECK(second.IsStackSlot());
2958 __ addss(first.AsFpuRegister<XmmRegister>(),
2959 Address(CpuRegister(RSP), second.GetStackIndex()));
2960 }
2961 break;
2962 }
2963
2964 case Primitive::kPrimDouble: {
2965 if (second.IsFpuRegister()) {
2966 __ addsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
2967 } else if (second.IsConstant()) {
2968 __ addsd(first.AsFpuRegister<XmmRegister>(),
2969 codegen_->LiteralDoubleAddress(
2970 second.GetConstant()->AsDoubleConstant()->GetValue()));
2971 } else {
2972 DCHECK(second.IsDoubleStackSlot());
2973 __ addsd(first.AsFpuRegister<XmmRegister>(),
2974 Address(CpuRegister(RSP), second.GetStackIndex()));
2975 }
2976 break;
2977 }
2978
2979 default:
2980 LOG(FATAL) << "Unexpected add type " << add->GetResultType();
2981 }
2982 }
2983
VisitSub(HSub * sub)2984 void LocationsBuilderX86_64::VisitSub(HSub* sub) {
2985 LocationSummary* locations =
2986 new (GetGraph()->GetArena()) LocationSummary(sub, LocationSummary::kNoCall);
2987 switch (sub->GetResultType()) {
2988 case Primitive::kPrimInt: {
2989 locations->SetInAt(0, Location::RequiresRegister());
2990 locations->SetInAt(1, Location::Any());
2991 locations->SetOut(Location::SameAsFirstInput());
2992 break;
2993 }
2994 case Primitive::kPrimLong: {
2995 locations->SetInAt(0, Location::RequiresRegister());
2996 locations->SetInAt(1, Location::RegisterOrInt32Constant(sub->InputAt(1)));
2997 locations->SetOut(Location::SameAsFirstInput());
2998 break;
2999 }
3000 case Primitive::kPrimFloat:
3001 case Primitive::kPrimDouble: {
3002 locations->SetInAt(0, Location::RequiresFpuRegister());
3003 locations->SetInAt(1, Location::Any());
3004 locations->SetOut(Location::SameAsFirstInput());
3005 break;
3006 }
3007 default:
3008 LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
3009 }
3010 }
3011
VisitSub(HSub * sub)3012 void InstructionCodeGeneratorX86_64::VisitSub(HSub* sub) {
3013 LocationSummary* locations = sub->GetLocations();
3014 Location first = locations->InAt(0);
3015 Location second = locations->InAt(1);
3016 DCHECK(first.Equals(locations->Out()));
3017 switch (sub->GetResultType()) {
3018 case Primitive::kPrimInt: {
3019 if (second.IsRegister()) {
3020 __ subl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3021 } else if (second.IsConstant()) {
3022 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue());
3023 __ subl(first.AsRegister<CpuRegister>(), imm);
3024 } else {
3025 __ subl(first.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), second.GetStackIndex()));
3026 }
3027 break;
3028 }
3029 case Primitive::kPrimLong: {
3030 if (second.IsConstant()) {
3031 int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
3032 DCHECK(IsInt<32>(value));
3033 __ subq(first.AsRegister<CpuRegister>(), Immediate(static_cast<int32_t>(value)));
3034 } else {
3035 __ subq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3036 }
3037 break;
3038 }
3039
3040 case Primitive::kPrimFloat: {
3041 if (second.IsFpuRegister()) {
3042 __ subss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3043 } else if (second.IsConstant()) {
3044 __ subss(first.AsFpuRegister<XmmRegister>(),
3045 codegen_->LiteralFloatAddress(
3046 second.GetConstant()->AsFloatConstant()->GetValue()));
3047 } else {
3048 DCHECK(second.IsStackSlot());
3049 __ subss(first.AsFpuRegister<XmmRegister>(),
3050 Address(CpuRegister(RSP), second.GetStackIndex()));
3051 }
3052 break;
3053 }
3054
3055 case Primitive::kPrimDouble: {
3056 if (second.IsFpuRegister()) {
3057 __ subsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3058 } else if (second.IsConstant()) {
3059 __ subsd(first.AsFpuRegister<XmmRegister>(),
3060 codegen_->LiteralDoubleAddress(
3061 second.GetConstant()->AsDoubleConstant()->GetValue()));
3062 } else {
3063 DCHECK(second.IsDoubleStackSlot());
3064 __ subsd(first.AsFpuRegister<XmmRegister>(),
3065 Address(CpuRegister(RSP), second.GetStackIndex()));
3066 }
3067 break;
3068 }
3069
3070 default:
3071 LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
3072 }
3073 }
3074
VisitMul(HMul * mul)3075 void LocationsBuilderX86_64::VisitMul(HMul* mul) {
3076 LocationSummary* locations =
3077 new (GetGraph()->GetArena()) LocationSummary(mul, LocationSummary::kNoCall);
3078 switch (mul->GetResultType()) {
3079 case Primitive::kPrimInt: {
3080 locations->SetInAt(0, Location::RequiresRegister());
3081 locations->SetInAt(1, Location::Any());
3082 if (mul->InputAt(1)->IsIntConstant()) {
3083 // Can use 3 operand multiply.
3084 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3085 } else {
3086 locations->SetOut(Location::SameAsFirstInput());
3087 }
3088 break;
3089 }
3090 case Primitive::kPrimLong: {
3091 locations->SetInAt(0, Location::RequiresRegister());
3092 locations->SetInAt(1, Location::Any());
3093 if (mul->InputAt(1)->IsLongConstant() &&
3094 IsInt<32>(mul->InputAt(1)->AsLongConstant()->GetValue())) {
3095 // Can use 3 operand multiply.
3096 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3097 } else {
3098 locations->SetOut(Location::SameAsFirstInput());
3099 }
3100 break;
3101 }
3102 case Primitive::kPrimFloat:
3103 case Primitive::kPrimDouble: {
3104 locations->SetInAt(0, Location::RequiresFpuRegister());
3105 locations->SetInAt(1, Location::Any());
3106 locations->SetOut(Location::SameAsFirstInput());
3107 break;
3108 }
3109
3110 default:
3111 LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
3112 }
3113 }
3114
VisitMul(HMul * mul)3115 void InstructionCodeGeneratorX86_64::VisitMul(HMul* mul) {
3116 LocationSummary* locations = mul->GetLocations();
3117 Location first = locations->InAt(0);
3118 Location second = locations->InAt(1);
3119 Location out = locations->Out();
3120 switch (mul->GetResultType()) {
3121 case Primitive::kPrimInt:
3122 // The constant may have ended up in a register, so test explicitly to avoid
3123 // problems where the output may not be the same as the first operand.
3124 if (mul->InputAt(1)->IsIntConstant()) {
3125 Immediate imm(mul->InputAt(1)->AsIntConstant()->GetValue());
3126 __ imull(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>(), imm);
3127 } else if (second.IsRegister()) {
3128 DCHECK(first.Equals(out));
3129 __ imull(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3130 } else {
3131 DCHECK(first.Equals(out));
3132 DCHECK(second.IsStackSlot());
3133 __ imull(first.AsRegister<CpuRegister>(),
3134 Address(CpuRegister(RSP), second.GetStackIndex()));
3135 }
3136 break;
3137 case Primitive::kPrimLong: {
3138 // The constant may have ended up in a register, so test explicitly to avoid
3139 // problems where the output may not be the same as the first operand.
3140 if (mul->InputAt(1)->IsLongConstant()) {
3141 int64_t value = mul->InputAt(1)->AsLongConstant()->GetValue();
3142 if (IsInt<32>(value)) {
3143 __ imulq(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>(),
3144 Immediate(static_cast<int32_t>(value)));
3145 } else {
3146 // Have to use the constant area.
3147 DCHECK(first.Equals(out));
3148 __ imulq(first.AsRegister<CpuRegister>(), codegen_->LiteralInt64Address(value));
3149 }
3150 } else if (second.IsRegister()) {
3151 DCHECK(first.Equals(out));
3152 __ imulq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3153 } else {
3154 DCHECK(second.IsDoubleStackSlot());
3155 DCHECK(first.Equals(out));
3156 __ imulq(first.AsRegister<CpuRegister>(),
3157 Address(CpuRegister(RSP), second.GetStackIndex()));
3158 }
3159 break;
3160 }
3161
3162 case Primitive::kPrimFloat: {
3163 DCHECK(first.Equals(out));
3164 if (second.IsFpuRegister()) {
3165 __ mulss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3166 } else if (second.IsConstant()) {
3167 __ mulss(first.AsFpuRegister<XmmRegister>(),
3168 codegen_->LiteralFloatAddress(
3169 second.GetConstant()->AsFloatConstant()->GetValue()));
3170 } else {
3171 DCHECK(second.IsStackSlot());
3172 __ mulss(first.AsFpuRegister<XmmRegister>(),
3173 Address(CpuRegister(RSP), second.GetStackIndex()));
3174 }
3175 break;
3176 }
3177
3178 case Primitive::kPrimDouble: {
3179 DCHECK(first.Equals(out));
3180 if (second.IsFpuRegister()) {
3181 __ mulsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3182 } else if (second.IsConstant()) {
3183 __ mulsd(first.AsFpuRegister<XmmRegister>(),
3184 codegen_->LiteralDoubleAddress(
3185 second.GetConstant()->AsDoubleConstant()->GetValue()));
3186 } else {
3187 DCHECK(second.IsDoubleStackSlot());
3188 __ mulsd(first.AsFpuRegister<XmmRegister>(),
3189 Address(CpuRegister(RSP), second.GetStackIndex()));
3190 }
3191 break;
3192 }
3193
3194 default:
3195 LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
3196 }
3197 }
3198
PushOntoFPStack(Location source,uint32_t temp_offset,uint32_t stack_adjustment,bool is_float)3199 void InstructionCodeGeneratorX86_64::PushOntoFPStack(Location source, uint32_t temp_offset,
3200 uint32_t stack_adjustment, bool is_float) {
3201 if (source.IsStackSlot()) {
3202 DCHECK(is_float);
3203 __ flds(Address(CpuRegister(RSP), source.GetStackIndex() + stack_adjustment));
3204 } else if (source.IsDoubleStackSlot()) {
3205 DCHECK(!is_float);
3206 __ fldl(Address(CpuRegister(RSP), source.GetStackIndex() + stack_adjustment));
3207 } else {
3208 // Write the value to the temporary location on the stack and load to FP stack.
3209 if (is_float) {
3210 Location stack_temp = Location::StackSlot(temp_offset);
3211 codegen_->Move(stack_temp, source);
3212 __ flds(Address(CpuRegister(RSP), temp_offset));
3213 } else {
3214 Location stack_temp = Location::DoubleStackSlot(temp_offset);
3215 codegen_->Move(stack_temp, source);
3216 __ fldl(Address(CpuRegister(RSP), temp_offset));
3217 }
3218 }
3219 }
3220
GenerateRemFP(HRem * rem)3221 void InstructionCodeGeneratorX86_64::GenerateRemFP(HRem *rem) {
3222 Primitive::Type type = rem->GetResultType();
3223 bool is_float = type == Primitive::kPrimFloat;
3224 size_t elem_size = Primitive::ComponentSize(type);
3225 LocationSummary* locations = rem->GetLocations();
3226 Location first = locations->InAt(0);
3227 Location second = locations->InAt(1);
3228 Location out = locations->Out();
3229
3230 // Create stack space for 2 elements.
3231 // TODO: enhance register allocator to ask for stack temporaries.
3232 __ subq(CpuRegister(RSP), Immediate(2 * elem_size));
3233
3234 // Load the values to the FP stack in reverse order, using temporaries if needed.
3235 PushOntoFPStack(second, elem_size, 2 * elem_size, is_float);
3236 PushOntoFPStack(first, 0, 2 * elem_size, is_float);
3237
3238 // Loop doing FPREM until we stabilize.
3239 NearLabel retry;
3240 __ Bind(&retry);
3241 __ fprem();
3242
3243 // Move FP status to AX.
3244 __ fstsw();
3245
3246 // And see if the argument reduction is complete. This is signaled by the
3247 // C2 FPU flag bit set to 0.
3248 __ andl(CpuRegister(RAX), Immediate(kC2ConditionMask));
3249 __ j(kNotEqual, &retry);
3250
3251 // We have settled on the final value. Retrieve it into an XMM register.
3252 // Store FP top of stack to real stack.
3253 if (is_float) {
3254 __ fsts(Address(CpuRegister(RSP), 0));
3255 } else {
3256 __ fstl(Address(CpuRegister(RSP), 0));
3257 }
3258
3259 // Pop the 2 items from the FP stack.
3260 __ fucompp();
3261
3262 // Load the value from the stack into an XMM register.
3263 DCHECK(out.IsFpuRegister()) << out;
3264 if (is_float) {
3265 __ movss(out.AsFpuRegister<XmmRegister>(), Address(CpuRegister(RSP), 0));
3266 } else {
3267 __ movsd(out.AsFpuRegister<XmmRegister>(), Address(CpuRegister(RSP), 0));
3268 }
3269
3270 // And remove the temporary stack space we allocated.
3271 __ addq(CpuRegister(RSP), Immediate(2 * elem_size));
3272 }
3273
DivRemOneOrMinusOne(HBinaryOperation * instruction)3274 void InstructionCodeGeneratorX86_64::DivRemOneOrMinusOne(HBinaryOperation* instruction) {
3275 DCHECK(instruction->IsDiv() || instruction->IsRem());
3276
3277 LocationSummary* locations = instruction->GetLocations();
3278 Location second = locations->InAt(1);
3279 DCHECK(second.IsConstant());
3280
3281 CpuRegister output_register = locations->Out().AsRegister<CpuRegister>();
3282 CpuRegister input_register = locations->InAt(0).AsRegister<CpuRegister>();
3283 int64_t imm = Int64FromConstant(second.GetConstant());
3284
3285 DCHECK(imm == 1 || imm == -1);
3286
3287 switch (instruction->GetResultType()) {
3288 case Primitive::kPrimInt: {
3289 if (instruction->IsRem()) {
3290 __ xorl(output_register, output_register);
3291 } else {
3292 __ movl(output_register, input_register);
3293 if (imm == -1) {
3294 __ negl(output_register);
3295 }
3296 }
3297 break;
3298 }
3299
3300 case Primitive::kPrimLong: {
3301 if (instruction->IsRem()) {
3302 __ xorl(output_register, output_register);
3303 } else {
3304 __ movq(output_register, input_register);
3305 if (imm == -1) {
3306 __ negq(output_register);
3307 }
3308 }
3309 break;
3310 }
3311
3312 default:
3313 LOG(FATAL) << "Unexpected type for div by (-)1 " << instruction->GetResultType();
3314 }
3315 }
3316
DivByPowerOfTwo(HDiv * instruction)3317 void InstructionCodeGeneratorX86_64::DivByPowerOfTwo(HDiv* instruction) {
3318 LocationSummary* locations = instruction->GetLocations();
3319 Location second = locations->InAt(1);
3320
3321 CpuRegister output_register = locations->Out().AsRegister<CpuRegister>();
3322 CpuRegister numerator = locations->InAt(0).AsRegister<CpuRegister>();
3323
3324 int64_t imm = Int64FromConstant(second.GetConstant());
3325 DCHECK(IsPowerOfTwo(AbsOrMin(imm)));
3326 uint64_t abs_imm = AbsOrMin(imm);
3327
3328 CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>();
3329
3330 if (instruction->GetResultType() == Primitive::kPrimInt) {
3331 __ leal(tmp, Address(numerator, abs_imm - 1));
3332 __ testl(numerator, numerator);
3333 __ cmov(kGreaterEqual, tmp, numerator);
3334 int shift = CTZ(imm);
3335 __ sarl(tmp, Immediate(shift));
3336
3337 if (imm < 0) {
3338 __ negl(tmp);
3339 }
3340
3341 __ movl(output_register, tmp);
3342 } else {
3343 DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimLong);
3344 CpuRegister rdx = locations->GetTemp(0).AsRegister<CpuRegister>();
3345
3346 codegen_->Load64BitValue(rdx, abs_imm - 1);
3347 __ addq(rdx, numerator);
3348 __ testq(numerator, numerator);
3349 __ cmov(kGreaterEqual, rdx, numerator);
3350 int shift = CTZ(imm);
3351 __ sarq(rdx, Immediate(shift));
3352
3353 if (imm < 0) {
3354 __ negq(rdx);
3355 }
3356
3357 __ movq(output_register, rdx);
3358 }
3359 }
3360
GenerateDivRemWithAnyConstant(HBinaryOperation * instruction)3361 void InstructionCodeGeneratorX86_64::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) {
3362 DCHECK(instruction->IsDiv() || instruction->IsRem());
3363
3364 LocationSummary* locations = instruction->GetLocations();
3365 Location second = locations->InAt(1);
3366
3367 CpuRegister numerator = instruction->IsDiv() ? locations->GetTemp(1).AsRegister<CpuRegister>()
3368 : locations->GetTemp(0).AsRegister<CpuRegister>();
3369 CpuRegister eax = locations->InAt(0).AsRegister<CpuRegister>();
3370 CpuRegister edx = instruction->IsDiv() ? locations->GetTemp(0).AsRegister<CpuRegister>()
3371 : locations->Out().AsRegister<CpuRegister>();
3372 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
3373
3374 DCHECK_EQ(RAX, eax.AsRegister());
3375 DCHECK_EQ(RDX, edx.AsRegister());
3376 if (instruction->IsDiv()) {
3377 DCHECK_EQ(RAX, out.AsRegister());
3378 } else {
3379 DCHECK_EQ(RDX, out.AsRegister());
3380 }
3381
3382 int64_t magic;
3383 int shift;
3384
3385 // TODO: can these branches be written as one?
3386 if (instruction->GetResultType() == Primitive::kPrimInt) {
3387 int imm = second.GetConstant()->AsIntConstant()->GetValue();
3388
3389 CalculateMagicAndShiftForDivRem(imm, false /* is_long */, &magic, &shift);
3390
3391 __ movl(numerator, eax);
3392
3393 NearLabel no_div;
3394 NearLabel end;
3395 __ testl(eax, eax);
3396 __ j(kNotEqual, &no_div);
3397
3398 __ xorl(out, out);
3399 __ jmp(&end);
3400
3401 __ Bind(&no_div);
3402
3403 __ movl(eax, Immediate(magic));
3404 __ imull(numerator);
3405
3406 if (imm > 0 && magic < 0) {
3407 __ addl(edx, numerator);
3408 } else if (imm < 0 && magic > 0) {
3409 __ subl(edx, numerator);
3410 }
3411
3412 if (shift != 0) {
3413 __ sarl(edx, Immediate(shift));
3414 }
3415
3416 __ movl(eax, edx);
3417 __ shrl(edx, Immediate(31));
3418 __ addl(edx, eax);
3419
3420 if (instruction->IsRem()) {
3421 __ movl(eax, numerator);
3422 __ imull(edx, Immediate(imm));
3423 __ subl(eax, edx);
3424 __ movl(edx, eax);
3425 } else {
3426 __ movl(eax, edx);
3427 }
3428 __ Bind(&end);
3429 } else {
3430 int64_t imm = second.GetConstant()->AsLongConstant()->GetValue();
3431
3432 DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimLong);
3433
3434 CpuRegister rax = eax;
3435 CpuRegister rdx = edx;
3436
3437 CalculateMagicAndShiftForDivRem(imm, true /* is_long */, &magic, &shift);
3438
3439 // Save the numerator.
3440 __ movq(numerator, rax);
3441
3442 // RAX = magic
3443 codegen_->Load64BitValue(rax, magic);
3444
3445 // RDX:RAX = magic * numerator
3446 __ imulq(numerator);
3447
3448 if (imm > 0 && magic < 0) {
3449 // RDX += numerator
3450 __ addq(rdx, numerator);
3451 } else if (imm < 0 && magic > 0) {
3452 // RDX -= numerator
3453 __ subq(rdx, numerator);
3454 }
3455
3456 // Shift if needed.
3457 if (shift != 0) {
3458 __ sarq(rdx, Immediate(shift));
3459 }
3460
3461 // RDX += 1 if RDX < 0
3462 __ movq(rax, rdx);
3463 __ shrq(rdx, Immediate(63));
3464 __ addq(rdx, rax);
3465
3466 if (instruction->IsRem()) {
3467 __ movq(rax, numerator);
3468
3469 if (IsInt<32>(imm)) {
3470 __ imulq(rdx, Immediate(static_cast<int32_t>(imm)));
3471 } else {
3472 __ imulq(rdx, codegen_->LiteralInt64Address(imm));
3473 }
3474
3475 __ subq(rax, rdx);
3476 __ movq(rdx, rax);
3477 } else {
3478 __ movq(rax, rdx);
3479 }
3480 }
3481 }
3482
GenerateDivRemIntegral(HBinaryOperation * instruction)3483 void InstructionCodeGeneratorX86_64::GenerateDivRemIntegral(HBinaryOperation* instruction) {
3484 DCHECK(instruction->IsDiv() || instruction->IsRem());
3485 Primitive::Type type = instruction->GetResultType();
3486 DCHECK(type == Primitive::kPrimInt || Primitive::kPrimLong);
3487
3488 bool is_div = instruction->IsDiv();
3489 LocationSummary* locations = instruction->GetLocations();
3490
3491 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
3492 Location second = locations->InAt(1);
3493
3494 DCHECK_EQ(RAX, locations->InAt(0).AsRegister<CpuRegister>().AsRegister());
3495 DCHECK_EQ(is_div ? RAX : RDX, out.AsRegister());
3496
3497 if (second.IsConstant()) {
3498 int64_t imm = Int64FromConstant(second.GetConstant());
3499
3500 if (imm == 0) {
3501 // Do not generate anything. DivZeroCheck would prevent any code to be executed.
3502 } else if (imm == 1 || imm == -1) {
3503 DivRemOneOrMinusOne(instruction);
3504 } else if (instruction->IsDiv() && IsPowerOfTwo(AbsOrMin(imm))) {
3505 DivByPowerOfTwo(instruction->AsDiv());
3506 } else {
3507 DCHECK(imm <= -2 || imm >= 2);
3508 GenerateDivRemWithAnyConstant(instruction);
3509 }
3510 } else {
3511 SlowPathCode* slow_path =
3512 new (GetGraph()->GetArena()) DivRemMinusOneSlowPathX86_64(
3513 instruction, out.AsRegister(), type, is_div);
3514 codegen_->AddSlowPath(slow_path);
3515
3516 CpuRegister second_reg = second.AsRegister<CpuRegister>();
3517 // 0x80000000(00000000)/-1 triggers an arithmetic exception!
3518 // Dividing by -1 is actually negation and -0x800000000(00000000) = 0x80000000(00000000)
3519 // so it's safe to just use negl instead of more complex comparisons.
3520 if (type == Primitive::kPrimInt) {
3521 __ cmpl(second_reg, Immediate(-1));
3522 __ j(kEqual, slow_path->GetEntryLabel());
3523 // edx:eax <- sign-extended of eax
3524 __ cdq();
3525 // eax = quotient, edx = remainder
3526 __ idivl(second_reg);
3527 } else {
3528 __ cmpq(second_reg, Immediate(-1));
3529 __ j(kEqual, slow_path->GetEntryLabel());
3530 // rdx:rax <- sign-extended of rax
3531 __ cqo();
3532 // rax = quotient, rdx = remainder
3533 __ idivq(second_reg);
3534 }
3535 __ Bind(slow_path->GetExitLabel());
3536 }
3537 }
3538
VisitDiv(HDiv * div)3539 void LocationsBuilderX86_64::VisitDiv(HDiv* div) {
3540 LocationSummary* locations =
3541 new (GetGraph()->GetArena()) LocationSummary(div, LocationSummary::kNoCall);
3542 switch (div->GetResultType()) {
3543 case Primitive::kPrimInt:
3544 case Primitive::kPrimLong: {
3545 locations->SetInAt(0, Location::RegisterLocation(RAX));
3546 locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1)));
3547 locations->SetOut(Location::SameAsFirstInput());
3548 // Intel uses edx:eax as the dividend.
3549 locations->AddTemp(Location::RegisterLocation(RDX));
3550 // We need to save the numerator while we tweak rax and rdx. As we are using imul in a way
3551 // which enforces results to be in RAX and RDX, things are simpler if we use RDX also as
3552 // output and request another temp.
3553 if (div->InputAt(1)->IsConstant()) {
3554 locations->AddTemp(Location::RequiresRegister());
3555 }
3556 break;
3557 }
3558
3559 case Primitive::kPrimFloat:
3560 case Primitive::kPrimDouble: {
3561 locations->SetInAt(0, Location::RequiresFpuRegister());
3562 locations->SetInAt(1, Location::Any());
3563 locations->SetOut(Location::SameAsFirstInput());
3564 break;
3565 }
3566
3567 default:
3568 LOG(FATAL) << "Unexpected div type " << div->GetResultType();
3569 }
3570 }
3571
VisitDiv(HDiv * div)3572 void InstructionCodeGeneratorX86_64::VisitDiv(HDiv* div) {
3573 LocationSummary* locations = div->GetLocations();
3574 Location first = locations->InAt(0);
3575 Location second = locations->InAt(1);
3576 DCHECK(first.Equals(locations->Out()));
3577
3578 Primitive::Type type = div->GetResultType();
3579 switch (type) {
3580 case Primitive::kPrimInt:
3581 case Primitive::kPrimLong: {
3582 GenerateDivRemIntegral(div);
3583 break;
3584 }
3585
3586 case Primitive::kPrimFloat: {
3587 if (second.IsFpuRegister()) {
3588 __ divss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3589 } else if (second.IsConstant()) {
3590 __ divss(first.AsFpuRegister<XmmRegister>(),
3591 codegen_->LiteralFloatAddress(
3592 second.GetConstant()->AsFloatConstant()->GetValue()));
3593 } else {
3594 DCHECK(second.IsStackSlot());
3595 __ divss(first.AsFpuRegister<XmmRegister>(),
3596 Address(CpuRegister(RSP), second.GetStackIndex()));
3597 }
3598 break;
3599 }
3600
3601 case Primitive::kPrimDouble: {
3602 if (second.IsFpuRegister()) {
3603 __ divsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3604 } else if (second.IsConstant()) {
3605 __ divsd(first.AsFpuRegister<XmmRegister>(),
3606 codegen_->LiteralDoubleAddress(
3607 second.GetConstant()->AsDoubleConstant()->GetValue()));
3608 } else {
3609 DCHECK(second.IsDoubleStackSlot());
3610 __ divsd(first.AsFpuRegister<XmmRegister>(),
3611 Address(CpuRegister(RSP), second.GetStackIndex()));
3612 }
3613 break;
3614 }
3615
3616 default:
3617 LOG(FATAL) << "Unexpected div type " << div->GetResultType();
3618 }
3619 }
3620
VisitRem(HRem * rem)3621 void LocationsBuilderX86_64::VisitRem(HRem* rem) {
3622 Primitive::Type type = rem->GetResultType();
3623 LocationSummary* locations =
3624 new (GetGraph()->GetArena()) LocationSummary(rem, LocationSummary::kNoCall);
3625
3626 switch (type) {
3627 case Primitive::kPrimInt:
3628 case Primitive::kPrimLong: {
3629 locations->SetInAt(0, Location::RegisterLocation(RAX));
3630 locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1)));
3631 // Intel uses rdx:rax as the dividend and puts the remainder in rdx
3632 locations->SetOut(Location::RegisterLocation(RDX));
3633 // We need to save the numerator while we tweak eax and edx. As we are using imul in a way
3634 // which enforces results to be in RAX and RDX, things are simpler if we use EAX also as
3635 // output and request another temp.
3636 if (rem->InputAt(1)->IsConstant()) {
3637 locations->AddTemp(Location::RequiresRegister());
3638 }
3639 break;
3640 }
3641
3642 case Primitive::kPrimFloat:
3643 case Primitive::kPrimDouble: {
3644 locations->SetInAt(0, Location::Any());
3645 locations->SetInAt(1, Location::Any());
3646 locations->SetOut(Location::RequiresFpuRegister());
3647 locations->AddTemp(Location::RegisterLocation(RAX));
3648 break;
3649 }
3650
3651 default:
3652 LOG(FATAL) << "Unexpected rem type " << type;
3653 }
3654 }
3655
VisitRem(HRem * rem)3656 void InstructionCodeGeneratorX86_64::VisitRem(HRem* rem) {
3657 Primitive::Type type = rem->GetResultType();
3658 switch (type) {
3659 case Primitive::kPrimInt:
3660 case Primitive::kPrimLong: {
3661 GenerateDivRemIntegral(rem);
3662 break;
3663 }
3664 case Primitive::kPrimFloat:
3665 case Primitive::kPrimDouble: {
3666 GenerateRemFP(rem);
3667 break;
3668 }
3669 default:
3670 LOG(FATAL) << "Unexpected rem type " << rem->GetResultType();
3671 }
3672 }
3673
VisitDivZeroCheck(HDivZeroCheck * instruction)3674 void LocationsBuilderX86_64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
3675 LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock()
3676 ? LocationSummary::kCallOnSlowPath
3677 : LocationSummary::kNoCall;
3678 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
3679 locations->SetInAt(0, Location::Any());
3680 if (instruction->HasUses()) {
3681 locations->SetOut(Location::SameAsFirstInput());
3682 }
3683 }
3684
VisitDivZeroCheck(HDivZeroCheck * instruction)3685 void InstructionCodeGeneratorX86_64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
3686 SlowPathCode* slow_path =
3687 new (GetGraph()->GetArena()) DivZeroCheckSlowPathX86_64(instruction);
3688 codegen_->AddSlowPath(slow_path);
3689
3690 LocationSummary* locations = instruction->GetLocations();
3691 Location value = locations->InAt(0);
3692
3693 switch (instruction->GetType()) {
3694 case Primitive::kPrimBoolean:
3695 case Primitive::kPrimByte:
3696 case Primitive::kPrimChar:
3697 case Primitive::kPrimShort:
3698 case Primitive::kPrimInt: {
3699 if (value.IsRegister()) {
3700 __ testl(value.AsRegister<CpuRegister>(), value.AsRegister<CpuRegister>());
3701 __ j(kEqual, slow_path->GetEntryLabel());
3702 } else if (value.IsStackSlot()) {
3703 __ cmpl(Address(CpuRegister(RSP), value.GetStackIndex()), Immediate(0));
3704 __ j(kEqual, slow_path->GetEntryLabel());
3705 } else {
3706 DCHECK(value.IsConstant()) << value;
3707 if (value.GetConstant()->AsIntConstant()->GetValue() == 0) {
3708 __ jmp(slow_path->GetEntryLabel());
3709 }
3710 }
3711 break;
3712 }
3713 case Primitive::kPrimLong: {
3714 if (value.IsRegister()) {
3715 __ testq(value.AsRegister<CpuRegister>(), value.AsRegister<CpuRegister>());
3716 __ j(kEqual, slow_path->GetEntryLabel());
3717 } else if (value.IsDoubleStackSlot()) {
3718 __ cmpq(Address(CpuRegister(RSP), value.GetStackIndex()), Immediate(0));
3719 __ j(kEqual, slow_path->GetEntryLabel());
3720 } else {
3721 DCHECK(value.IsConstant()) << value;
3722 if (value.GetConstant()->AsLongConstant()->GetValue() == 0) {
3723 __ jmp(slow_path->GetEntryLabel());
3724 }
3725 }
3726 break;
3727 }
3728 default:
3729 LOG(FATAL) << "Unexpected type for HDivZeroCheck " << instruction->GetType();
3730 }
3731 }
3732
HandleShift(HBinaryOperation * op)3733 void LocationsBuilderX86_64::HandleShift(HBinaryOperation* op) {
3734 DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
3735
3736 LocationSummary* locations =
3737 new (GetGraph()->GetArena()) LocationSummary(op, LocationSummary::kNoCall);
3738
3739 switch (op->GetResultType()) {
3740 case Primitive::kPrimInt:
3741 case Primitive::kPrimLong: {
3742 locations->SetInAt(0, Location::RequiresRegister());
3743 // The shift count needs to be in CL.
3744 locations->SetInAt(1, Location::ByteRegisterOrConstant(RCX, op->InputAt(1)));
3745 locations->SetOut(Location::SameAsFirstInput());
3746 break;
3747 }
3748 default:
3749 LOG(FATAL) << "Unexpected operation type " << op->GetResultType();
3750 }
3751 }
3752
HandleShift(HBinaryOperation * op)3753 void InstructionCodeGeneratorX86_64::HandleShift(HBinaryOperation* op) {
3754 DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
3755
3756 LocationSummary* locations = op->GetLocations();
3757 CpuRegister first_reg = locations->InAt(0).AsRegister<CpuRegister>();
3758 Location second = locations->InAt(1);
3759
3760 switch (op->GetResultType()) {
3761 case Primitive::kPrimInt: {
3762 if (second.IsRegister()) {
3763 CpuRegister second_reg = second.AsRegister<CpuRegister>();
3764 if (op->IsShl()) {
3765 __ shll(first_reg, second_reg);
3766 } else if (op->IsShr()) {
3767 __ sarl(first_reg, second_reg);
3768 } else {
3769 __ shrl(first_reg, second_reg);
3770 }
3771 } else {
3772 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftDistance);
3773 if (op->IsShl()) {
3774 __ shll(first_reg, imm);
3775 } else if (op->IsShr()) {
3776 __ sarl(first_reg, imm);
3777 } else {
3778 __ shrl(first_reg, imm);
3779 }
3780 }
3781 break;
3782 }
3783 case Primitive::kPrimLong: {
3784 if (second.IsRegister()) {
3785 CpuRegister second_reg = second.AsRegister<CpuRegister>();
3786 if (op->IsShl()) {
3787 __ shlq(first_reg, second_reg);
3788 } else if (op->IsShr()) {
3789 __ sarq(first_reg, second_reg);
3790 } else {
3791 __ shrq(first_reg, second_reg);
3792 }
3793 } else {
3794 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftDistance);
3795 if (op->IsShl()) {
3796 __ shlq(first_reg, imm);
3797 } else if (op->IsShr()) {
3798 __ sarq(first_reg, imm);
3799 } else {
3800 __ shrq(first_reg, imm);
3801 }
3802 }
3803 break;
3804 }
3805 default:
3806 LOG(FATAL) << "Unexpected operation type " << op->GetResultType();
3807 UNREACHABLE();
3808 }
3809 }
3810
VisitRor(HRor * ror)3811 void LocationsBuilderX86_64::VisitRor(HRor* ror) {
3812 LocationSummary* locations =
3813 new (GetGraph()->GetArena()) LocationSummary(ror, LocationSummary::kNoCall);
3814
3815 switch (ror->GetResultType()) {
3816 case Primitive::kPrimInt:
3817 case Primitive::kPrimLong: {
3818 locations->SetInAt(0, Location::RequiresRegister());
3819 // The shift count needs to be in CL (unless it is a constant).
3820 locations->SetInAt(1, Location::ByteRegisterOrConstant(RCX, ror->InputAt(1)));
3821 locations->SetOut(Location::SameAsFirstInput());
3822 break;
3823 }
3824 default:
3825 LOG(FATAL) << "Unexpected operation type " << ror->GetResultType();
3826 UNREACHABLE();
3827 }
3828 }
3829
VisitRor(HRor * ror)3830 void InstructionCodeGeneratorX86_64::VisitRor(HRor* ror) {
3831 LocationSummary* locations = ror->GetLocations();
3832 CpuRegister first_reg = locations->InAt(0).AsRegister<CpuRegister>();
3833 Location second = locations->InAt(1);
3834
3835 switch (ror->GetResultType()) {
3836 case Primitive::kPrimInt:
3837 if (second.IsRegister()) {
3838 CpuRegister second_reg = second.AsRegister<CpuRegister>();
3839 __ rorl(first_reg, second_reg);
3840 } else {
3841 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftDistance);
3842 __ rorl(first_reg, imm);
3843 }
3844 break;
3845 case Primitive::kPrimLong:
3846 if (second.IsRegister()) {
3847 CpuRegister second_reg = second.AsRegister<CpuRegister>();
3848 __ rorq(first_reg, second_reg);
3849 } else {
3850 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftDistance);
3851 __ rorq(first_reg, imm);
3852 }
3853 break;
3854 default:
3855 LOG(FATAL) << "Unexpected operation type " << ror->GetResultType();
3856 UNREACHABLE();
3857 }
3858 }
3859
VisitShl(HShl * shl)3860 void LocationsBuilderX86_64::VisitShl(HShl* shl) {
3861 HandleShift(shl);
3862 }
3863
VisitShl(HShl * shl)3864 void InstructionCodeGeneratorX86_64::VisitShl(HShl* shl) {
3865 HandleShift(shl);
3866 }
3867
VisitShr(HShr * shr)3868 void LocationsBuilderX86_64::VisitShr(HShr* shr) {
3869 HandleShift(shr);
3870 }
3871
VisitShr(HShr * shr)3872 void InstructionCodeGeneratorX86_64::VisitShr(HShr* shr) {
3873 HandleShift(shr);
3874 }
3875
VisitUShr(HUShr * ushr)3876 void LocationsBuilderX86_64::VisitUShr(HUShr* ushr) {
3877 HandleShift(ushr);
3878 }
3879
VisitUShr(HUShr * ushr)3880 void InstructionCodeGeneratorX86_64::VisitUShr(HUShr* ushr) {
3881 HandleShift(ushr);
3882 }
3883
VisitNewInstance(HNewInstance * instruction)3884 void LocationsBuilderX86_64::VisitNewInstance(HNewInstance* instruction) {
3885 LocationSummary* locations =
3886 new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
3887 InvokeRuntimeCallingConvention calling_convention;
3888 if (instruction->IsStringAlloc()) {
3889 locations->AddTemp(Location::RegisterLocation(kMethodRegisterArgument));
3890 } else {
3891 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
3892 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
3893 }
3894 locations->SetOut(Location::RegisterLocation(RAX));
3895 }
3896
VisitNewInstance(HNewInstance * instruction)3897 void InstructionCodeGeneratorX86_64::VisitNewInstance(HNewInstance* instruction) {
3898 // Note: if heap poisoning is enabled, the entry point takes cares
3899 // of poisoning the reference.
3900 if (instruction->IsStringAlloc()) {
3901 // String is allocated through StringFactory. Call NewEmptyString entry point.
3902 CpuRegister temp = instruction->GetLocations()->GetTemp(0).AsRegister<CpuRegister>();
3903 MemberOffset code_offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86_64WordSize);
3904 __ gs()->movq(temp, Address::Absolute(QUICK_ENTRY_POINT(pNewEmptyString), /* no_rip */ true));
3905 __ call(Address(temp, code_offset.SizeValue()));
3906 codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
3907 } else {
3908 codegen_->InvokeRuntime(instruction->GetEntrypoint(),
3909 instruction,
3910 instruction->GetDexPc(),
3911 nullptr);
3912 CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>();
3913 DCHECK(!codegen_->IsLeafMethod());
3914 }
3915 }
3916
VisitNewArray(HNewArray * instruction)3917 void LocationsBuilderX86_64::VisitNewArray(HNewArray* instruction) {
3918 LocationSummary* locations =
3919 new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
3920 InvokeRuntimeCallingConvention calling_convention;
3921 locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
3922 locations->SetOut(Location::RegisterLocation(RAX));
3923 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
3924 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
3925 }
3926
VisitNewArray(HNewArray * instruction)3927 void InstructionCodeGeneratorX86_64::VisitNewArray(HNewArray* instruction) {
3928 InvokeRuntimeCallingConvention calling_convention;
3929 codegen_->Load64BitValue(CpuRegister(calling_convention.GetRegisterAt(0)),
3930 instruction->GetTypeIndex());
3931 // Note: if heap poisoning is enabled, the entry point takes cares
3932 // of poisoning the reference.
3933 codegen_->InvokeRuntime(instruction->GetEntrypoint(),
3934 instruction,
3935 instruction->GetDexPc(),
3936 nullptr);
3937 CheckEntrypointTypes<kQuickAllocArrayWithAccessCheck, void*, uint32_t, int32_t, ArtMethod*>();
3938
3939 DCHECK(!codegen_->IsLeafMethod());
3940 }
3941
VisitParameterValue(HParameterValue * instruction)3942 void LocationsBuilderX86_64::VisitParameterValue(HParameterValue* instruction) {
3943 LocationSummary* locations =
3944 new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
3945 Location location = parameter_visitor_.GetNextLocation(instruction->GetType());
3946 if (location.IsStackSlot()) {
3947 location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
3948 } else if (location.IsDoubleStackSlot()) {
3949 location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
3950 }
3951 locations->SetOut(location);
3952 }
3953
VisitParameterValue(HParameterValue * instruction ATTRIBUTE_UNUSED)3954 void InstructionCodeGeneratorX86_64::VisitParameterValue(
3955 HParameterValue* instruction ATTRIBUTE_UNUSED) {
3956 // Nothing to do, the parameter is already at its location.
3957 }
3958
VisitCurrentMethod(HCurrentMethod * instruction)3959 void LocationsBuilderX86_64::VisitCurrentMethod(HCurrentMethod* instruction) {
3960 LocationSummary* locations =
3961 new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
3962 locations->SetOut(Location::RegisterLocation(kMethodRegisterArgument));
3963 }
3964
VisitCurrentMethod(HCurrentMethod * instruction ATTRIBUTE_UNUSED)3965 void InstructionCodeGeneratorX86_64::VisitCurrentMethod(
3966 HCurrentMethod* instruction ATTRIBUTE_UNUSED) {
3967 // Nothing to do, the method is already at its location.
3968 }
3969
VisitClassTableGet(HClassTableGet * instruction)3970 void LocationsBuilderX86_64::VisitClassTableGet(HClassTableGet* instruction) {
3971 LocationSummary* locations =
3972 new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
3973 locations->SetInAt(0, Location::RequiresRegister());
3974 locations->SetOut(Location::RequiresRegister());
3975 }
3976
VisitClassTableGet(HClassTableGet * instruction)3977 void InstructionCodeGeneratorX86_64::VisitClassTableGet(HClassTableGet* instruction) {
3978 LocationSummary* locations = instruction->GetLocations();
3979 uint32_t method_offset = 0;
3980 if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) {
3981 method_offset = mirror::Class::EmbeddedVTableEntryOffset(
3982 instruction->GetIndex(), kX86_64PointerSize).SizeValue();
3983 } else {
3984 method_offset = mirror::Class::EmbeddedImTableEntryOffset(
3985 instruction->GetIndex() % mirror::Class::kImtSize, kX86_64PointerSize).Uint32Value();
3986 }
3987 __ movq(locations->Out().AsRegister<CpuRegister>(),
3988 Address(locations->InAt(0).AsRegister<CpuRegister>(), method_offset));
3989 }
3990
VisitNot(HNot * not_)3991 void LocationsBuilderX86_64::VisitNot(HNot* not_) {
3992 LocationSummary* locations =
3993 new (GetGraph()->GetArena()) LocationSummary(not_, LocationSummary::kNoCall);
3994 locations->SetInAt(0, Location::RequiresRegister());
3995 locations->SetOut(Location::SameAsFirstInput());
3996 }
3997
VisitNot(HNot * not_)3998 void InstructionCodeGeneratorX86_64::VisitNot(HNot* not_) {
3999 LocationSummary* locations = not_->GetLocations();
4000 DCHECK_EQ(locations->InAt(0).AsRegister<CpuRegister>().AsRegister(),
4001 locations->Out().AsRegister<CpuRegister>().AsRegister());
4002 Location out = locations->Out();
4003 switch (not_->GetResultType()) {
4004 case Primitive::kPrimInt:
4005 __ notl(out.AsRegister<CpuRegister>());
4006 break;
4007
4008 case Primitive::kPrimLong:
4009 __ notq(out.AsRegister<CpuRegister>());
4010 break;
4011
4012 default:
4013 LOG(FATAL) << "Unimplemented type for not operation " << not_->GetResultType();
4014 }
4015 }
4016
VisitBooleanNot(HBooleanNot * bool_not)4017 void LocationsBuilderX86_64::VisitBooleanNot(HBooleanNot* bool_not) {
4018 LocationSummary* locations =
4019 new (GetGraph()->GetArena()) LocationSummary(bool_not, LocationSummary::kNoCall);
4020 locations->SetInAt(0, Location::RequiresRegister());
4021 locations->SetOut(Location::SameAsFirstInput());
4022 }
4023
VisitBooleanNot(HBooleanNot * bool_not)4024 void InstructionCodeGeneratorX86_64::VisitBooleanNot(HBooleanNot* bool_not) {
4025 LocationSummary* locations = bool_not->GetLocations();
4026 DCHECK_EQ(locations->InAt(0).AsRegister<CpuRegister>().AsRegister(),
4027 locations->Out().AsRegister<CpuRegister>().AsRegister());
4028 Location out = locations->Out();
4029 __ xorl(out.AsRegister<CpuRegister>(), Immediate(1));
4030 }
4031
VisitPhi(HPhi * instruction)4032 void LocationsBuilderX86_64::VisitPhi(HPhi* instruction) {
4033 LocationSummary* locations =
4034 new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
4035 for (size_t i = 0, e = instruction->InputCount(); i < e; ++i) {
4036 locations->SetInAt(i, Location::Any());
4037 }
4038 locations->SetOut(Location::Any());
4039 }
4040
VisitPhi(HPhi * instruction ATTRIBUTE_UNUSED)4041 void InstructionCodeGeneratorX86_64::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) {
4042 LOG(FATAL) << "Unimplemented";
4043 }
4044
GenerateMemoryBarrier(MemBarrierKind kind)4045 void CodeGeneratorX86_64::GenerateMemoryBarrier(MemBarrierKind kind) {
4046 /*
4047 * According to the JSR-133 Cookbook, for x86 only StoreLoad/AnyAny barriers need memory fence.
4048 * All other barriers (LoadAny, AnyStore, StoreStore) are nops due to the x86-64 memory model.
4049 * For those cases, all we need to ensure is that there is a scheduling barrier in place.
4050 */
4051 switch (kind) {
4052 case MemBarrierKind::kAnyAny: {
4053 MemoryFence();
4054 break;
4055 }
4056 case MemBarrierKind::kAnyStore:
4057 case MemBarrierKind::kLoadAny:
4058 case MemBarrierKind::kStoreStore: {
4059 // nop
4060 break;
4061 }
4062 default:
4063 LOG(FATAL) << "Unexpected memory barier " << kind;
4064 }
4065 }
4066
HandleFieldGet(HInstruction * instruction)4067 void LocationsBuilderX86_64::HandleFieldGet(HInstruction* instruction) {
4068 DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
4069
4070 bool object_field_get_with_read_barrier =
4071 kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot);
4072 LocationSummary* locations =
4073 new (GetGraph()->GetArena()) LocationSummary(instruction,
4074 object_field_get_with_read_barrier ?
4075 LocationSummary::kCallOnSlowPath :
4076 LocationSummary::kNoCall);
4077 locations->SetInAt(0, Location::RequiresRegister());
4078 if (Primitive::IsFloatingPointType(instruction->GetType())) {
4079 locations->SetOut(Location::RequiresFpuRegister());
4080 } else {
4081 // The output overlaps for an object field get when read barriers
4082 // are enabled: we do not want the move to overwrite the object's
4083 // location, as we need it to emit the read barrier.
4084 locations->SetOut(
4085 Location::RequiresRegister(),
4086 object_field_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
4087 }
4088 if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
4089 // We need a temporary register for the read barrier marking slow
4090 // path in CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier.
4091 locations->AddTemp(Location::RequiresRegister());
4092 }
4093 }
4094
HandleFieldGet(HInstruction * instruction,const FieldInfo & field_info)4095 void InstructionCodeGeneratorX86_64::HandleFieldGet(HInstruction* instruction,
4096 const FieldInfo& field_info) {
4097 DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
4098
4099 LocationSummary* locations = instruction->GetLocations();
4100 Location base_loc = locations->InAt(0);
4101 CpuRegister base = base_loc.AsRegister<CpuRegister>();
4102 Location out = locations->Out();
4103 bool is_volatile = field_info.IsVolatile();
4104 Primitive::Type field_type = field_info.GetFieldType();
4105 uint32_t offset = field_info.GetFieldOffset().Uint32Value();
4106
4107 switch (field_type) {
4108 case Primitive::kPrimBoolean: {
4109 __ movzxb(out.AsRegister<CpuRegister>(), Address(base, offset));
4110 break;
4111 }
4112
4113 case Primitive::kPrimByte: {
4114 __ movsxb(out.AsRegister<CpuRegister>(), Address(base, offset));
4115 break;
4116 }
4117
4118 case Primitive::kPrimShort: {
4119 __ movsxw(out.AsRegister<CpuRegister>(), Address(base, offset));
4120 break;
4121 }
4122
4123 case Primitive::kPrimChar: {
4124 __ movzxw(out.AsRegister<CpuRegister>(), Address(base, offset));
4125 break;
4126 }
4127
4128 case Primitive::kPrimInt: {
4129 __ movl(out.AsRegister<CpuRegister>(), Address(base, offset));
4130 break;
4131 }
4132
4133 case Primitive::kPrimNot: {
4134 // /* HeapReference<Object> */ out = *(base + offset)
4135 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
4136 Location temp_loc = locations->GetTemp(0);
4137 // Note that a potential implicit null check is handled in this
4138 // CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier call.
4139 codegen_->GenerateFieldLoadWithBakerReadBarrier(
4140 instruction, out, base, offset, temp_loc, /* needs_null_check */ true);
4141 if (is_volatile) {
4142 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
4143 }
4144 } else {
4145 __ movl(out.AsRegister<CpuRegister>(), Address(base, offset));
4146 codegen_->MaybeRecordImplicitNullCheck(instruction);
4147 if (is_volatile) {
4148 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
4149 }
4150 // If read barriers are enabled, emit read barriers other than
4151 // Baker's using a slow path (and also unpoison the loaded
4152 // reference, if heap poisoning is enabled).
4153 codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, base_loc, offset);
4154 }
4155 break;
4156 }
4157
4158 case Primitive::kPrimLong: {
4159 __ movq(out.AsRegister<CpuRegister>(), Address(base, offset));
4160 break;
4161 }
4162
4163 case Primitive::kPrimFloat: {
4164 __ movss(out.AsFpuRegister<XmmRegister>(), Address(base, offset));
4165 break;
4166 }
4167
4168 case Primitive::kPrimDouble: {
4169 __ movsd(out.AsFpuRegister<XmmRegister>(), Address(base, offset));
4170 break;
4171 }
4172
4173 case Primitive::kPrimVoid:
4174 LOG(FATAL) << "Unreachable type " << field_type;
4175 UNREACHABLE();
4176 }
4177
4178 if (field_type == Primitive::kPrimNot) {
4179 // Potential implicit null checks, in the case of reference
4180 // fields, are handled in the previous switch statement.
4181 } else {
4182 codegen_->MaybeRecordImplicitNullCheck(instruction);
4183 }
4184
4185 if (is_volatile) {
4186 if (field_type == Primitive::kPrimNot) {
4187 // Memory barriers, in the case of references, are also handled
4188 // in the previous switch statement.
4189 } else {
4190 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
4191 }
4192 }
4193 }
4194
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info)4195 void LocationsBuilderX86_64::HandleFieldSet(HInstruction* instruction,
4196 const FieldInfo& field_info) {
4197 DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
4198
4199 LocationSummary* locations =
4200 new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
4201 Primitive::Type field_type = field_info.GetFieldType();
4202 bool is_volatile = field_info.IsVolatile();
4203 bool needs_write_barrier =
4204 CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1));
4205
4206 locations->SetInAt(0, Location::RequiresRegister());
4207 if (Primitive::IsFloatingPointType(instruction->InputAt(1)->GetType())) {
4208 if (is_volatile) {
4209 // In order to satisfy the semantics of volatile, this must be a single instruction store.
4210 locations->SetInAt(1, Location::FpuRegisterOrInt32Constant(instruction->InputAt(1)));
4211 } else {
4212 locations->SetInAt(1, Location::FpuRegisterOrConstant(instruction->InputAt(1)));
4213 }
4214 } else {
4215 if (is_volatile) {
4216 // In order to satisfy the semantics of volatile, this must be a single instruction store.
4217 locations->SetInAt(1, Location::RegisterOrInt32Constant(instruction->InputAt(1)));
4218 } else {
4219 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
4220 }
4221 }
4222 if (needs_write_barrier) {
4223 // Temporary registers for the write barrier.
4224 locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too.
4225 locations->AddTemp(Location::RequiresRegister());
4226 } else if (kPoisonHeapReferences && field_type == Primitive::kPrimNot) {
4227 // Temporary register for the reference poisoning.
4228 locations->AddTemp(Location::RequiresRegister());
4229 }
4230 }
4231
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info,bool value_can_be_null)4232 void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction,
4233 const FieldInfo& field_info,
4234 bool value_can_be_null) {
4235 DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
4236
4237 LocationSummary* locations = instruction->GetLocations();
4238 CpuRegister base = locations->InAt(0).AsRegister<CpuRegister>();
4239 Location value = locations->InAt(1);
4240 bool is_volatile = field_info.IsVolatile();
4241 Primitive::Type field_type = field_info.GetFieldType();
4242 uint32_t offset = field_info.GetFieldOffset().Uint32Value();
4243
4244 if (is_volatile) {
4245 codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
4246 }
4247
4248 bool maybe_record_implicit_null_check_done = false;
4249
4250 switch (field_type) {
4251 case Primitive::kPrimBoolean:
4252 case Primitive::kPrimByte: {
4253 if (value.IsConstant()) {
4254 int8_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
4255 __ movb(Address(base, offset), Immediate(v));
4256 } else {
4257 __ movb(Address(base, offset), value.AsRegister<CpuRegister>());
4258 }
4259 break;
4260 }
4261
4262 case Primitive::kPrimShort:
4263 case Primitive::kPrimChar: {
4264 if (value.IsConstant()) {
4265 int16_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
4266 __ movw(Address(base, offset), Immediate(v));
4267 } else {
4268 __ movw(Address(base, offset), value.AsRegister<CpuRegister>());
4269 }
4270 break;
4271 }
4272
4273 case Primitive::kPrimInt:
4274 case Primitive::kPrimNot: {
4275 if (value.IsConstant()) {
4276 int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
4277 // `field_type == Primitive::kPrimNot` implies `v == 0`.
4278 DCHECK((field_type != Primitive::kPrimNot) || (v == 0));
4279 // Note: if heap poisoning is enabled, no need to poison
4280 // (negate) `v` if it is a reference, as it would be null.
4281 __ movl(Address(base, offset), Immediate(v));
4282 } else {
4283 if (kPoisonHeapReferences && field_type == Primitive::kPrimNot) {
4284 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
4285 __ movl(temp, value.AsRegister<CpuRegister>());
4286 __ PoisonHeapReference(temp);
4287 __ movl(Address(base, offset), temp);
4288 } else {
4289 __ movl(Address(base, offset), value.AsRegister<CpuRegister>());
4290 }
4291 }
4292 break;
4293 }
4294
4295 case Primitive::kPrimLong: {
4296 if (value.IsConstant()) {
4297 int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
4298 codegen_->MoveInt64ToAddress(Address(base, offset),
4299 Address(base, offset + sizeof(int32_t)),
4300 v,
4301 instruction);
4302 maybe_record_implicit_null_check_done = true;
4303 } else {
4304 __ movq(Address(base, offset), value.AsRegister<CpuRegister>());
4305 }
4306 break;
4307 }
4308
4309 case Primitive::kPrimFloat: {
4310 if (value.IsConstant()) {
4311 int32_t v =
4312 bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue());
4313 __ movl(Address(base, offset), Immediate(v));
4314 } else {
4315 __ movss(Address(base, offset), value.AsFpuRegister<XmmRegister>());
4316 }
4317 break;
4318 }
4319
4320 case Primitive::kPrimDouble: {
4321 if (value.IsConstant()) {
4322 int64_t v =
4323 bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue());
4324 codegen_->MoveInt64ToAddress(Address(base, offset),
4325 Address(base, offset + sizeof(int32_t)),
4326 v,
4327 instruction);
4328 maybe_record_implicit_null_check_done = true;
4329 } else {
4330 __ movsd(Address(base, offset), value.AsFpuRegister<XmmRegister>());
4331 }
4332 break;
4333 }
4334
4335 case Primitive::kPrimVoid:
4336 LOG(FATAL) << "Unreachable type " << field_type;
4337 UNREACHABLE();
4338 }
4339
4340 if (!maybe_record_implicit_null_check_done) {
4341 codegen_->MaybeRecordImplicitNullCheck(instruction);
4342 }
4343
4344 if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) {
4345 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
4346 CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>();
4347 codegen_->MarkGCCard(temp, card, base, value.AsRegister<CpuRegister>(), value_can_be_null);
4348 }
4349
4350 if (is_volatile) {
4351 codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
4352 }
4353 }
4354
VisitInstanceFieldSet(HInstanceFieldSet * instruction)4355 void LocationsBuilderX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
4356 HandleFieldSet(instruction, instruction->GetFieldInfo());
4357 }
4358
VisitInstanceFieldSet(HInstanceFieldSet * instruction)4359 void InstructionCodeGeneratorX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
4360 HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
4361 }
4362
VisitInstanceFieldGet(HInstanceFieldGet * instruction)4363 void LocationsBuilderX86_64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
4364 HandleFieldGet(instruction);
4365 }
4366
VisitInstanceFieldGet(HInstanceFieldGet * instruction)4367 void InstructionCodeGeneratorX86_64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
4368 HandleFieldGet(instruction, instruction->GetFieldInfo());
4369 }
4370
VisitStaticFieldGet(HStaticFieldGet * instruction)4371 void LocationsBuilderX86_64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
4372 HandleFieldGet(instruction);
4373 }
4374
VisitStaticFieldGet(HStaticFieldGet * instruction)4375 void InstructionCodeGeneratorX86_64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
4376 HandleFieldGet(instruction, instruction->GetFieldInfo());
4377 }
4378
VisitStaticFieldSet(HStaticFieldSet * instruction)4379 void LocationsBuilderX86_64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
4380 HandleFieldSet(instruction, instruction->GetFieldInfo());
4381 }
4382
VisitStaticFieldSet(HStaticFieldSet * instruction)4383 void InstructionCodeGeneratorX86_64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
4384 HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
4385 }
4386
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)4387 void LocationsBuilderX86_64::VisitUnresolvedInstanceFieldGet(
4388 HUnresolvedInstanceFieldGet* instruction) {
4389 FieldAccessCallingConventionX86_64 calling_convention;
4390 codegen_->CreateUnresolvedFieldLocationSummary(
4391 instruction, instruction->GetFieldType(), calling_convention);
4392 }
4393
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)4394 void InstructionCodeGeneratorX86_64::VisitUnresolvedInstanceFieldGet(
4395 HUnresolvedInstanceFieldGet* instruction) {
4396 FieldAccessCallingConventionX86_64 calling_convention;
4397 codegen_->GenerateUnresolvedFieldAccess(instruction,
4398 instruction->GetFieldType(),
4399 instruction->GetFieldIndex(),
4400 instruction->GetDexPc(),
4401 calling_convention);
4402 }
4403
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)4404 void LocationsBuilderX86_64::VisitUnresolvedInstanceFieldSet(
4405 HUnresolvedInstanceFieldSet* instruction) {
4406 FieldAccessCallingConventionX86_64 calling_convention;
4407 codegen_->CreateUnresolvedFieldLocationSummary(
4408 instruction, instruction->GetFieldType(), calling_convention);
4409 }
4410
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)4411 void InstructionCodeGeneratorX86_64::VisitUnresolvedInstanceFieldSet(
4412 HUnresolvedInstanceFieldSet* instruction) {
4413 FieldAccessCallingConventionX86_64 calling_convention;
4414 codegen_->GenerateUnresolvedFieldAccess(instruction,
4415 instruction->GetFieldType(),
4416 instruction->GetFieldIndex(),
4417 instruction->GetDexPc(),
4418 calling_convention);
4419 }
4420
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)4421 void LocationsBuilderX86_64::VisitUnresolvedStaticFieldGet(
4422 HUnresolvedStaticFieldGet* instruction) {
4423 FieldAccessCallingConventionX86_64 calling_convention;
4424 codegen_->CreateUnresolvedFieldLocationSummary(
4425 instruction, instruction->GetFieldType(), calling_convention);
4426 }
4427
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)4428 void InstructionCodeGeneratorX86_64::VisitUnresolvedStaticFieldGet(
4429 HUnresolvedStaticFieldGet* instruction) {
4430 FieldAccessCallingConventionX86_64 calling_convention;
4431 codegen_->GenerateUnresolvedFieldAccess(instruction,
4432 instruction->GetFieldType(),
4433 instruction->GetFieldIndex(),
4434 instruction->GetDexPc(),
4435 calling_convention);
4436 }
4437
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)4438 void LocationsBuilderX86_64::VisitUnresolvedStaticFieldSet(
4439 HUnresolvedStaticFieldSet* instruction) {
4440 FieldAccessCallingConventionX86_64 calling_convention;
4441 codegen_->CreateUnresolvedFieldLocationSummary(
4442 instruction, instruction->GetFieldType(), calling_convention);
4443 }
4444
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)4445 void InstructionCodeGeneratorX86_64::VisitUnresolvedStaticFieldSet(
4446 HUnresolvedStaticFieldSet* instruction) {
4447 FieldAccessCallingConventionX86_64 calling_convention;
4448 codegen_->GenerateUnresolvedFieldAccess(instruction,
4449 instruction->GetFieldType(),
4450 instruction->GetFieldIndex(),
4451 instruction->GetDexPc(),
4452 calling_convention);
4453 }
4454
VisitNullCheck(HNullCheck * instruction)4455 void LocationsBuilderX86_64::VisitNullCheck(HNullCheck* instruction) {
4456 LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock()
4457 ? LocationSummary::kCallOnSlowPath
4458 : LocationSummary::kNoCall;
4459 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
4460 Location loc = codegen_->IsImplicitNullCheckAllowed(instruction)
4461 ? Location::RequiresRegister()
4462 : Location::Any();
4463 locations->SetInAt(0, loc);
4464 if (instruction->HasUses()) {
4465 locations->SetOut(Location::SameAsFirstInput());
4466 }
4467 }
4468
GenerateImplicitNullCheck(HNullCheck * instruction)4469 void CodeGeneratorX86_64::GenerateImplicitNullCheck(HNullCheck* instruction) {
4470 if (CanMoveNullCheckToUser(instruction)) {
4471 return;
4472 }
4473 LocationSummary* locations = instruction->GetLocations();
4474 Location obj = locations->InAt(0);
4475
4476 __ testl(CpuRegister(RAX), Address(obj.AsRegister<CpuRegister>(), 0));
4477 RecordPcInfo(instruction, instruction->GetDexPc());
4478 }
4479
GenerateExplicitNullCheck(HNullCheck * instruction)4480 void CodeGeneratorX86_64::GenerateExplicitNullCheck(HNullCheck* instruction) {
4481 SlowPathCode* slow_path = new (GetGraph()->GetArena()) NullCheckSlowPathX86_64(instruction);
4482 AddSlowPath(slow_path);
4483
4484 LocationSummary* locations = instruction->GetLocations();
4485 Location obj = locations->InAt(0);
4486
4487 if (obj.IsRegister()) {
4488 __ testl(obj.AsRegister<CpuRegister>(), obj.AsRegister<CpuRegister>());
4489 } else if (obj.IsStackSlot()) {
4490 __ cmpl(Address(CpuRegister(RSP), obj.GetStackIndex()), Immediate(0));
4491 } else {
4492 DCHECK(obj.IsConstant()) << obj;
4493 DCHECK(obj.GetConstant()->IsNullConstant());
4494 __ jmp(slow_path->GetEntryLabel());
4495 return;
4496 }
4497 __ j(kEqual, slow_path->GetEntryLabel());
4498 }
4499
VisitNullCheck(HNullCheck * instruction)4500 void InstructionCodeGeneratorX86_64::VisitNullCheck(HNullCheck* instruction) {
4501 codegen_->GenerateNullCheck(instruction);
4502 }
4503
VisitArrayGet(HArrayGet * instruction)4504 void LocationsBuilderX86_64::VisitArrayGet(HArrayGet* instruction) {
4505 bool object_array_get_with_read_barrier =
4506 kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot);
4507 LocationSummary* locations =
4508 new (GetGraph()->GetArena()) LocationSummary(instruction,
4509 object_array_get_with_read_barrier ?
4510 LocationSummary::kCallOnSlowPath :
4511 LocationSummary::kNoCall);
4512 locations->SetInAt(0, Location::RequiresRegister());
4513 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
4514 if (Primitive::IsFloatingPointType(instruction->GetType())) {
4515 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
4516 } else {
4517 // The output overlaps for an object array get when read barriers
4518 // are enabled: we do not want the move to overwrite the array's
4519 // location, as we need it to emit the read barrier.
4520 locations->SetOut(
4521 Location::RequiresRegister(),
4522 object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
4523 }
4524 // We need a temporary register for the read barrier marking slow
4525 // path in CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier.
4526 if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
4527 locations->AddTemp(Location::RequiresRegister());
4528 }
4529 }
4530
VisitArrayGet(HArrayGet * instruction)4531 void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) {
4532 LocationSummary* locations = instruction->GetLocations();
4533 Location obj_loc = locations->InAt(0);
4534 CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
4535 Location index = locations->InAt(1);
4536 Location out_loc = locations->Out();
4537
4538 Primitive::Type type = instruction->GetType();
4539 switch (type) {
4540 case Primitive::kPrimBoolean: {
4541 uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
4542 CpuRegister out = out_loc.AsRegister<CpuRegister>();
4543 if (index.IsConstant()) {
4544 __ movzxb(out, Address(obj,
4545 (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset));
4546 } else {
4547 __ movzxb(out, Address(obj, index.AsRegister<CpuRegister>(), TIMES_1, data_offset));
4548 }
4549 break;
4550 }
4551
4552 case Primitive::kPrimByte: {
4553 uint32_t data_offset = mirror::Array::DataOffset(sizeof(int8_t)).Uint32Value();
4554 CpuRegister out = out_loc.AsRegister<CpuRegister>();
4555 if (index.IsConstant()) {
4556 __ movsxb(out, Address(obj,
4557 (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset));
4558 } else {
4559 __ movsxb(out, Address(obj, index.AsRegister<CpuRegister>(), TIMES_1, data_offset));
4560 }
4561 break;
4562 }
4563
4564 case Primitive::kPrimShort: {
4565 uint32_t data_offset = mirror::Array::DataOffset(sizeof(int16_t)).Uint32Value();
4566 CpuRegister out = out_loc.AsRegister<CpuRegister>();
4567 if (index.IsConstant()) {
4568 __ movsxw(out, Address(obj,
4569 (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset));
4570 } else {
4571 __ movsxw(out, Address(obj, index.AsRegister<CpuRegister>(), TIMES_2, data_offset));
4572 }
4573 break;
4574 }
4575
4576 case Primitive::kPrimChar: {
4577 uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
4578 CpuRegister out = out_loc.AsRegister<CpuRegister>();
4579 if (index.IsConstant()) {
4580 __ movzxw(out, Address(obj,
4581 (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset));
4582 } else {
4583 __ movzxw(out, Address(obj, index.AsRegister<CpuRegister>(), TIMES_2, data_offset));
4584 }
4585 break;
4586 }
4587
4588 case Primitive::kPrimInt: {
4589 uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
4590 CpuRegister out = out_loc.AsRegister<CpuRegister>();
4591 if (index.IsConstant()) {
4592 __ movl(out, Address(obj,
4593 (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset));
4594 } else {
4595 __ movl(out, Address(obj, index.AsRegister<CpuRegister>(), TIMES_4, data_offset));
4596 }
4597 break;
4598 }
4599
4600 case Primitive::kPrimNot: {
4601 static_assert(
4602 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
4603 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
4604 uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
4605 // /* HeapReference<Object> */ out =
4606 // *(obj + data_offset + index * sizeof(HeapReference<Object>))
4607 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
4608 Location temp = locations->GetTemp(0);
4609 // Note that a potential implicit null check is handled in this
4610 // CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier call.
4611 codegen_->GenerateArrayLoadWithBakerReadBarrier(
4612 instruction, out_loc, obj, data_offset, index, temp, /* needs_null_check */ true);
4613 } else {
4614 CpuRegister out = out_loc.AsRegister<CpuRegister>();
4615 if (index.IsConstant()) {
4616 uint32_t offset =
4617 (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
4618 __ movl(out, Address(obj, offset));
4619 codegen_->MaybeRecordImplicitNullCheck(instruction);
4620 // If read barriers are enabled, emit read barriers other than
4621 // Baker's using a slow path (and also unpoison the loaded
4622 // reference, if heap poisoning is enabled).
4623 codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset);
4624 } else {
4625 __ movl(out, Address(obj, index.AsRegister<CpuRegister>(), TIMES_4, data_offset));
4626 codegen_->MaybeRecordImplicitNullCheck(instruction);
4627 // If read barriers are enabled, emit read barriers other than
4628 // Baker's using a slow path (and also unpoison the loaded
4629 // reference, if heap poisoning is enabled).
4630 codegen_->MaybeGenerateReadBarrierSlow(
4631 instruction, out_loc, out_loc, obj_loc, data_offset, index);
4632 }
4633 }
4634 break;
4635 }
4636
4637 case Primitive::kPrimLong: {
4638 uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
4639 CpuRegister out = out_loc.AsRegister<CpuRegister>();
4640 if (index.IsConstant()) {
4641 __ movq(out, Address(obj,
4642 (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset));
4643 } else {
4644 __ movq(out, Address(obj, index.AsRegister<CpuRegister>(), TIMES_8, data_offset));
4645 }
4646 break;
4647 }
4648
4649 case Primitive::kPrimFloat: {
4650 uint32_t data_offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
4651 XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
4652 if (index.IsConstant()) {
4653 __ movss(out, Address(obj,
4654 (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset));
4655 } else {
4656 __ movss(out, Address(obj, index.AsRegister<CpuRegister>(), TIMES_4, data_offset));
4657 }
4658 break;
4659 }
4660
4661 case Primitive::kPrimDouble: {
4662 uint32_t data_offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
4663 XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
4664 if (index.IsConstant()) {
4665 __ movsd(out, Address(obj,
4666 (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset));
4667 } else {
4668 __ movsd(out, Address(obj, index.AsRegister<CpuRegister>(), TIMES_8, data_offset));
4669 }
4670 break;
4671 }
4672
4673 case Primitive::kPrimVoid:
4674 LOG(FATAL) << "Unreachable type " << type;
4675 UNREACHABLE();
4676 }
4677
4678 if (type == Primitive::kPrimNot) {
4679 // Potential implicit null checks, in the case of reference
4680 // arrays, are handled in the previous switch statement.
4681 } else {
4682 codegen_->MaybeRecordImplicitNullCheck(instruction);
4683 }
4684 }
4685
VisitArraySet(HArraySet * instruction)4686 void LocationsBuilderX86_64::VisitArraySet(HArraySet* instruction) {
4687 Primitive::Type value_type = instruction->GetComponentType();
4688
4689 bool needs_write_barrier =
4690 CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
4691 bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
4692 bool object_array_set_with_read_barrier =
4693 kEmitCompilerReadBarrier && (value_type == Primitive::kPrimNot);
4694
4695 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
4696 instruction,
4697 (may_need_runtime_call_for_type_check || object_array_set_with_read_barrier) ?
4698 LocationSummary::kCallOnSlowPath :
4699 LocationSummary::kNoCall);
4700
4701 locations->SetInAt(0, Location::RequiresRegister());
4702 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
4703 if (Primitive::IsFloatingPointType(value_type)) {
4704 locations->SetInAt(2, Location::FpuRegisterOrConstant(instruction->InputAt(2)));
4705 } else {
4706 locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2)));
4707 }
4708
4709 if (needs_write_barrier) {
4710 // Temporary registers for the write barrier.
4711
4712 // This first temporary register is possibly used for heap
4713 // reference poisoning and/or read barrier emission too.
4714 locations->AddTemp(Location::RequiresRegister());
4715 locations->AddTemp(Location::RequiresRegister());
4716 }
4717 }
4718
VisitArraySet(HArraySet * instruction)4719 void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) {
4720 LocationSummary* locations = instruction->GetLocations();
4721 Location array_loc = locations->InAt(0);
4722 CpuRegister array = array_loc.AsRegister<CpuRegister>();
4723 Location index = locations->InAt(1);
4724 Location value = locations->InAt(2);
4725 Primitive::Type value_type = instruction->GetComponentType();
4726 bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
4727 bool needs_write_barrier =
4728 CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
4729 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
4730 uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
4731 uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
4732
4733 switch (value_type) {
4734 case Primitive::kPrimBoolean:
4735 case Primitive::kPrimByte: {
4736 uint32_t offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
4737 Address address = index.IsConstant()
4738 ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + offset)
4739 : Address(array, index.AsRegister<CpuRegister>(), TIMES_1, offset);
4740 if (value.IsRegister()) {
4741 __ movb(address, value.AsRegister<CpuRegister>());
4742 } else {
4743 __ movb(address, Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
4744 }
4745 codegen_->MaybeRecordImplicitNullCheck(instruction);
4746 break;
4747 }
4748
4749 case Primitive::kPrimShort:
4750 case Primitive::kPrimChar: {
4751 uint32_t offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
4752 Address address = index.IsConstant()
4753 ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + offset)
4754 : Address(array, index.AsRegister<CpuRegister>(), TIMES_2, offset);
4755 if (value.IsRegister()) {
4756 __ movw(address, value.AsRegister<CpuRegister>());
4757 } else {
4758 DCHECK(value.IsConstant()) << value;
4759 __ movw(address, Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
4760 }
4761 codegen_->MaybeRecordImplicitNullCheck(instruction);
4762 break;
4763 }
4764
4765 case Primitive::kPrimNot: {
4766 uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
4767 Address address = index.IsConstant()
4768 ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + offset)
4769 : Address(array, index.AsRegister<CpuRegister>(), TIMES_4, offset);
4770
4771 if (!value.IsRegister()) {
4772 // Just setting null.
4773 DCHECK(instruction->InputAt(2)->IsNullConstant());
4774 DCHECK(value.IsConstant()) << value;
4775 __ movl(address, Immediate(0));
4776 codegen_->MaybeRecordImplicitNullCheck(instruction);
4777 DCHECK(!needs_write_barrier);
4778 DCHECK(!may_need_runtime_call_for_type_check);
4779 break;
4780 }
4781
4782 DCHECK(needs_write_barrier);
4783 CpuRegister register_value = value.AsRegister<CpuRegister>();
4784 NearLabel done, not_null, do_put;
4785 SlowPathCode* slow_path = nullptr;
4786 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
4787 if (may_need_runtime_call_for_type_check) {
4788 slow_path = new (GetGraph()->GetArena()) ArraySetSlowPathX86_64(instruction);
4789 codegen_->AddSlowPath(slow_path);
4790 if (instruction->GetValueCanBeNull()) {
4791 __ testl(register_value, register_value);
4792 __ j(kNotEqual, ¬_null);
4793 __ movl(address, Immediate(0));
4794 codegen_->MaybeRecordImplicitNullCheck(instruction);
4795 __ jmp(&done);
4796 __ Bind(¬_null);
4797 }
4798
4799 if (kEmitCompilerReadBarrier) {
4800 // When read barriers are enabled, the type checking
4801 // instrumentation requires two read barriers:
4802 //
4803 // __ movl(temp2, temp);
4804 // // /* HeapReference<Class> */ temp = temp->component_type_
4805 // __ movl(temp, Address(temp, component_offset));
4806 // codegen_->GenerateReadBarrierSlow(
4807 // instruction, temp_loc, temp_loc, temp2_loc, component_offset);
4808 //
4809 // // /* HeapReference<Class> */ temp2 = register_value->klass_
4810 // __ movl(temp2, Address(register_value, class_offset));
4811 // codegen_->GenerateReadBarrierSlow(
4812 // instruction, temp2_loc, temp2_loc, value, class_offset, temp_loc);
4813 //
4814 // __ cmpl(temp, temp2);
4815 //
4816 // However, the second read barrier may trash `temp`, as it
4817 // is a temporary register, and as such would not be saved
4818 // along with live registers before calling the runtime (nor
4819 // restored afterwards). So in this case, we bail out and
4820 // delegate the work to the array set slow path.
4821 //
4822 // TODO: Extend the register allocator to support a new
4823 // "(locally) live temp" location so as to avoid always
4824 // going into the slow path when read barriers are enabled.
4825 __ jmp(slow_path->GetEntryLabel());
4826 } else {
4827 // /* HeapReference<Class> */ temp = array->klass_
4828 __ movl(temp, Address(array, class_offset));
4829 codegen_->MaybeRecordImplicitNullCheck(instruction);
4830 __ MaybeUnpoisonHeapReference(temp);
4831
4832 // /* HeapReference<Class> */ temp = temp->component_type_
4833 __ movl(temp, Address(temp, component_offset));
4834 // If heap poisoning is enabled, no need to unpoison `temp`
4835 // nor the object reference in `register_value->klass`, as
4836 // we are comparing two poisoned references.
4837 __ cmpl(temp, Address(register_value, class_offset));
4838
4839 if (instruction->StaticTypeOfArrayIsObjectArray()) {
4840 __ j(kEqual, &do_put);
4841 // If heap poisoning is enabled, the `temp` reference has
4842 // not been unpoisoned yet; unpoison it now.
4843 __ MaybeUnpoisonHeapReference(temp);
4844
4845 // /* HeapReference<Class> */ temp = temp->super_class_
4846 __ movl(temp, Address(temp, super_offset));
4847 // If heap poisoning is enabled, no need to unpoison
4848 // `temp`, as we are comparing against null below.
4849 __ testl(temp, temp);
4850 __ j(kNotEqual, slow_path->GetEntryLabel());
4851 __ Bind(&do_put);
4852 } else {
4853 __ j(kNotEqual, slow_path->GetEntryLabel());
4854 }
4855 }
4856 }
4857
4858 if (kPoisonHeapReferences) {
4859 __ movl(temp, register_value);
4860 __ PoisonHeapReference(temp);
4861 __ movl(address, temp);
4862 } else {
4863 __ movl(address, register_value);
4864 }
4865 if (!may_need_runtime_call_for_type_check) {
4866 codegen_->MaybeRecordImplicitNullCheck(instruction);
4867 }
4868
4869 CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>();
4870 codegen_->MarkGCCard(
4871 temp, card, array, value.AsRegister<CpuRegister>(), instruction->GetValueCanBeNull());
4872 __ Bind(&done);
4873
4874 if (slow_path != nullptr) {
4875 __ Bind(slow_path->GetExitLabel());
4876 }
4877
4878 break;
4879 }
4880
4881 case Primitive::kPrimInt: {
4882 uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
4883 Address address = index.IsConstant()
4884 ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + offset)
4885 : Address(array, index.AsRegister<CpuRegister>(), TIMES_4, offset);
4886 if (value.IsRegister()) {
4887 __ movl(address, value.AsRegister<CpuRegister>());
4888 } else {
4889 DCHECK(value.IsConstant()) << value;
4890 int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
4891 __ movl(address, Immediate(v));
4892 }
4893 codegen_->MaybeRecordImplicitNullCheck(instruction);
4894 break;
4895 }
4896
4897 case Primitive::kPrimLong: {
4898 uint32_t offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
4899 Address address = index.IsConstant()
4900 ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + offset)
4901 : Address(array, index.AsRegister<CpuRegister>(), TIMES_8, offset);
4902 if (value.IsRegister()) {
4903 __ movq(address, value.AsRegister<CpuRegister>());
4904 codegen_->MaybeRecordImplicitNullCheck(instruction);
4905 } else {
4906 int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
4907 Address address_high = index.IsConstant()
4908 ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) +
4909 offset + sizeof(int32_t))
4910 : Address(array, index.AsRegister<CpuRegister>(), TIMES_8, offset + sizeof(int32_t));
4911 codegen_->MoveInt64ToAddress(address, address_high, v, instruction);
4912 }
4913 break;
4914 }
4915
4916 case Primitive::kPrimFloat: {
4917 uint32_t offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
4918 Address address = index.IsConstant()
4919 ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + offset)
4920 : Address(array, index.AsRegister<CpuRegister>(), TIMES_4, offset);
4921 if (value.IsFpuRegister()) {
4922 __ movss(address, value.AsFpuRegister<XmmRegister>());
4923 } else {
4924 DCHECK(value.IsConstant());
4925 int32_t v =
4926 bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue());
4927 __ movl(address, Immediate(v));
4928 }
4929 codegen_->MaybeRecordImplicitNullCheck(instruction);
4930 break;
4931 }
4932
4933 case Primitive::kPrimDouble: {
4934 uint32_t offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
4935 Address address = index.IsConstant()
4936 ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + offset)
4937 : Address(array, index.AsRegister<CpuRegister>(), TIMES_8, offset);
4938 if (value.IsFpuRegister()) {
4939 __ movsd(address, value.AsFpuRegister<XmmRegister>());
4940 codegen_->MaybeRecordImplicitNullCheck(instruction);
4941 } else {
4942 int64_t v =
4943 bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue());
4944 Address address_high = index.IsConstant()
4945 ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) +
4946 offset + sizeof(int32_t))
4947 : Address(array, index.AsRegister<CpuRegister>(), TIMES_8, offset + sizeof(int32_t));
4948 codegen_->MoveInt64ToAddress(address, address_high, v, instruction);
4949 }
4950 break;
4951 }
4952
4953 case Primitive::kPrimVoid:
4954 LOG(FATAL) << "Unreachable type " << instruction->GetType();
4955 UNREACHABLE();
4956 }
4957 }
4958
VisitArrayLength(HArrayLength * instruction)4959 void LocationsBuilderX86_64::VisitArrayLength(HArrayLength* instruction) {
4960 LocationSummary* locations =
4961 new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
4962 locations->SetInAt(0, Location::RequiresRegister());
4963 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
4964 }
4965
VisitArrayLength(HArrayLength * instruction)4966 void InstructionCodeGeneratorX86_64::VisitArrayLength(HArrayLength* instruction) {
4967 LocationSummary* locations = instruction->GetLocations();
4968 uint32_t offset = mirror::Array::LengthOffset().Uint32Value();
4969 CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
4970 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
4971 __ movl(out, Address(obj, offset));
4972 codegen_->MaybeRecordImplicitNullCheck(instruction);
4973 }
4974
VisitBoundsCheck(HBoundsCheck * instruction)4975 void LocationsBuilderX86_64::VisitBoundsCheck(HBoundsCheck* instruction) {
4976 LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock()
4977 ? LocationSummary::kCallOnSlowPath
4978 : LocationSummary::kNoCall;
4979 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
4980 locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
4981 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
4982 if (instruction->HasUses()) {
4983 locations->SetOut(Location::SameAsFirstInput());
4984 }
4985 }
4986
VisitBoundsCheck(HBoundsCheck * instruction)4987 void InstructionCodeGeneratorX86_64::VisitBoundsCheck(HBoundsCheck* instruction) {
4988 LocationSummary* locations = instruction->GetLocations();
4989 Location index_loc = locations->InAt(0);
4990 Location length_loc = locations->InAt(1);
4991 SlowPathCode* slow_path =
4992 new (GetGraph()->GetArena()) BoundsCheckSlowPathX86_64(instruction);
4993
4994 if (length_loc.IsConstant()) {
4995 int32_t length = CodeGenerator::GetInt32ValueOf(length_loc.GetConstant());
4996 if (index_loc.IsConstant()) {
4997 // BCE will remove the bounds check if we are guarenteed to pass.
4998 int32_t index = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
4999 if (index < 0 || index >= length) {
5000 codegen_->AddSlowPath(slow_path);
5001 __ jmp(slow_path->GetEntryLabel());
5002 } else {
5003 // Some optimization after BCE may have generated this, and we should not
5004 // generate a bounds check if it is a valid range.
5005 }
5006 return;
5007 }
5008
5009 // We have to reverse the jump condition because the length is the constant.
5010 CpuRegister index_reg = index_loc.AsRegister<CpuRegister>();
5011 __ cmpl(index_reg, Immediate(length));
5012 codegen_->AddSlowPath(slow_path);
5013 __ j(kAboveEqual, slow_path->GetEntryLabel());
5014 } else {
5015 CpuRegister length = length_loc.AsRegister<CpuRegister>();
5016 if (index_loc.IsConstant()) {
5017 int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
5018 __ cmpl(length, Immediate(value));
5019 } else {
5020 __ cmpl(length, index_loc.AsRegister<CpuRegister>());
5021 }
5022 codegen_->AddSlowPath(slow_path);
5023 __ j(kBelowEqual, slow_path->GetEntryLabel());
5024 }
5025 }
5026
MarkGCCard(CpuRegister temp,CpuRegister card,CpuRegister object,CpuRegister value,bool value_can_be_null)5027 void CodeGeneratorX86_64::MarkGCCard(CpuRegister temp,
5028 CpuRegister card,
5029 CpuRegister object,
5030 CpuRegister value,
5031 bool value_can_be_null) {
5032 NearLabel is_null;
5033 if (value_can_be_null) {
5034 __ testl(value, value);
5035 __ j(kEqual, &is_null);
5036 }
5037 __ gs()->movq(card, Address::Absolute(Thread::CardTableOffset<kX86_64WordSize>().Int32Value(),
5038 /* no_rip */ true));
5039 __ movq(temp, object);
5040 __ shrq(temp, Immediate(gc::accounting::CardTable::kCardShift));
5041 __ movb(Address(temp, card, TIMES_1, 0), card);
5042 if (value_can_be_null) {
5043 __ Bind(&is_null);
5044 }
5045 }
5046
VisitParallelMove(HParallelMove * instruction ATTRIBUTE_UNUSED)5047 void LocationsBuilderX86_64::VisitParallelMove(HParallelMove* instruction ATTRIBUTE_UNUSED) {
5048 LOG(FATAL) << "Unimplemented";
5049 }
5050
VisitParallelMove(HParallelMove * instruction)5051 void InstructionCodeGeneratorX86_64::VisitParallelMove(HParallelMove* instruction) {
5052 codegen_->GetMoveResolver()->EmitNativeCode(instruction);
5053 }
5054
VisitSuspendCheck(HSuspendCheck * instruction)5055 void LocationsBuilderX86_64::VisitSuspendCheck(HSuspendCheck* instruction) {
5056 new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath);
5057 }
5058
VisitSuspendCheck(HSuspendCheck * instruction)5059 void InstructionCodeGeneratorX86_64::VisitSuspendCheck(HSuspendCheck* instruction) {
5060 HBasicBlock* block = instruction->GetBlock();
5061 if (block->GetLoopInformation() != nullptr) {
5062 DCHECK(block->GetLoopInformation()->GetSuspendCheck() == instruction);
5063 // The back edge will generate the suspend check.
5064 return;
5065 }
5066 if (block->IsEntryBlock() && instruction->GetNext()->IsGoto()) {
5067 // The goto will generate the suspend check.
5068 return;
5069 }
5070 GenerateSuspendCheck(instruction, nullptr);
5071 }
5072
GenerateSuspendCheck(HSuspendCheck * instruction,HBasicBlock * successor)5073 void InstructionCodeGeneratorX86_64::GenerateSuspendCheck(HSuspendCheck* instruction,
5074 HBasicBlock* successor) {
5075 SuspendCheckSlowPathX86_64* slow_path =
5076 down_cast<SuspendCheckSlowPathX86_64*>(instruction->GetSlowPath());
5077 if (slow_path == nullptr) {
5078 slow_path = new (GetGraph()->GetArena()) SuspendCheckSlowPathX86_64(instruction, successor);
5079 instruction->SetSlowPath(slow_path);
5080 codegen_->AddSlowPath(slow_path);
5081 if (successor != nullptr) {
5082 DCHECK(successor->IsLoopHeader());
5083 codegen_->ClearSpillSlotsFromLoopPhisInStackMap(instruction);
5084 }
5085 } else {
5086 DCHECK_EQ(slow_path->GetSuccessor(), successor);
5087 }
5088
5089 __ gs()->cmpw(Address::Absolute(Thread::ThreadFlagsOffset<kX86_64WordSize>().Int32Value(),
5090 /* no_rip */ true),
5091 Immediate(0));
5092 if (successor == nullptr) {
5093 __ j(kNotEqual, slow_path->GetEntryLabel());
5094 __ Bind(slow_path->GetReturnLabel());
5095 } else {
5096 __ j(kEqual, codegen_->GetLabelOf(successor));
5097 __ jmp(slow_path->GetEntryLabel());
5098 }
5099 }
5100
GetAssembler() const5101 X86_64Assembler* ParallelMoveResolverX86_64::GetAssembler() const {
5102 return codegen_->GetAssembler();
5103 }
5104
EmitMove(size_t index)5105 void ParallelMoveResolverX86_64::EmitMove(size_t index) {
5106 MoveOperands* move = moves_[index];
5107 Location source = move->GetSource();
5108 Location destination = move->GetDestination();
5109
5110 if (source.IsRegister()) {
5111 if (destination.IsRegister()) {
5112 __ movq(destination.AsRegister<CpuRegister>(), source.AsRegister<CpuRegister>());
5113 } else if (destination.IsStackSlot()) {
5114 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()),
5115 source.AsRegister<CpuRegister>());
5116 } else {
5117 DCHECK(destination.IsDoubleStackSlot());
5118 __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()),
5119 source.AsRegister<CpuRegister>());
5120 }
5121 } else if (source.IsStackSlot()) {
5122 if (destination.IsRegister()) {
5123 __ movl(destination.AsRegister<CpuRegister>(),
5124 Address(CpuRegister(RSP), source.GetStackIndex()));
5125 } else if (destination.IsFpuRegister()) {
5126 __ movss(destination.AsFpuRegister<XmmRegister>(),
5127 Address(CpuRegister(RSP), source.GetStackIndex()));
5128 } else {
5129 DCHECK(destination.IsStackSlot());
5130 __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
5131 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
5132 }
5133 } else if (source.IsDoubleStackSlot()) {
5134 if (destination.IsRegister()) {
5135 __ movq(destination.AsRegister<CpuRegister>(),
5136 Address(CpuRegister(RSP), source.GetStackIndex()));
5137 } else if (destination.IsFpuRegister()) {
5138 __ movsd(destination.AsFpuRegister<XmmRegister>(),
5139 Address(CpuRegister(RSP), source.GetStackIndex()));
5140 } else {
5141 DCHECK(destination.IsDoubleStackSlot()) << destination;
5142 __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
5143 __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
5144 }
5145 } else if (source.IsConstant()) {
5146 HConstant* constant = source.GetConstant();
5147 if (constant->IsIntConstant() || constant->IsNullConstant()) {
5148 int32_t value = CodeGenerator::GetInt32ValueOf(constant);
5149 if (destination.IsRegister()) {
5150 if (value == 0) {
5151 __ xorl(destination.AsRegister<CpuRegister>(), destination.AsRegister<CpuRegister>());
5152 } else {
5153 __ movl(destination.AsRegister<CpuRegister>(), Immediate(value));
5154 }
5155 } else {
5156 DCHECK(destination.IsStackSlot()) << destination;
5157 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), Immediate(value));
5158 }
5159 } else if (constant->IsLongConstant()) {
5160 int64_t value = constant->AsLongConstant()->GetValue();
5161 if (destination.IsRegister()) {
5162 codegen_->Load64BitValue(destination.AsRegister<CpuRegister>(), value);
5163 } else {
5164 DCHECK(destination.IsDoubleStackSlot()) << destination;
5165 codegen_->Store64BitValueToStack(destination, value);
5166 }
5167 } else if (constant->IsFloatConstant()) {
5168 float fp_value = constant->AsFloatConstant()->GetValue();
5169 if (destination.IsFpuRegister()) {
5170 XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
5171 codegen_->Load32BitValue(dest, fp_value);
5172 } else {
5173 DCHECK(destination.IsStackSlot()) << destination;
5174 Immediate imm(bit_cast<int32_t, float>(fp_value));
5175 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), imm);
5176 }
5177 } else {
5178 DCHECK(constant->IsDoubleConstant()) << constant->DebugName();
5179 double fp_value = constant->AsDoubleConstant()->GetValue();
5180 int64_t value = bit_cast<int64_t, double>(fp_value);
5181 if (destination.IsFpuRegister()) {
5182 XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
5183 codegen_->Load64BitValue(dest, fp_value);
5184 } else {
5185 DCHECK(destination.IsDoubleStackSlot()) << destination;
5186 codegen_->Store64BitValueToStack(destination, value);
5187 }
5188 }
5189 } else if (source.IsFpuRegister()) {
5190 if (destination.IsFpuRegister()) {
5191 __ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
5192 } else if (destination.IsStackSlot()) {
5193 __ movss(Address(CpuRegister(RSP), destination.GetStackIndex()),
5194 source.AsFpuRegister<XmmRegister>());
5195 } else {
5196 DCHECK(destination.IsDoubleStackSlot()) << destination;
5197 __ movsd(Address(CpuRegister(RSP), destination.GetStackIndex()),
5198 source.AsFpuRegister<XmmRegister>());
5199 }
5200 }
5201 }
5202
Exchange32(CpuRegister reg,int mem)5203 void ParallelMoveResolverX86_64::Exchange32(CpuRegister reg, int mem) {
5204 __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
5205 __ movl(Address(CpuRegister(RSP), mem), reg);
5206 __ movl(reg, CpuRegister(TMP));
5207 }
5208
Exchange32(int mem1,int mem2)5209 void ParallelMoveResolverX86_64::Exchange32(int mem1, int mem2) {
5210 ScratchRegisterScope ensure_scratch(
5211 this, TMP, RAX, codegen_->GetNumberOfCoreRegisters());
5212
5213 int stack_offset = ensure_scratch.IsSpilled() ? kX86_64WordSize : 0;
5214 __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem1 + stack_offset));
5215 __ movl(CpuRegister(ensure_scratch.GetRegister()),
5216 Address(CpuRegister(RSP), mem2 + stack_offset));
5217 __ movl(Address(CpuRegister(RSP), mem2 + stack_offset), CpuRegister(TMP));
5218 __ movl(Address(CpuRegister(RSP), mem1 + stack_offset),
5219 CpuRegister(ensure_scratch.GetRegister()));
5220 }
5221
Exchange64(CpuRegister reg1,CpuRegister reg2)5222 void ParallelMoveResolverX86_64::Exchange64(CpuRegister reg1, CpuRegister reg2) {
5223 __ movq(CpuRegister(TMP), reg1);
5224 __ movq(reg1, reg2);
5225 __ movq(reg2, CpuRegister(TMP));
5226 }
5227
Exchange64(CpuRegister reg,int mem)5228 void ParallelMoveResolverX86_64::Exchange64(CpuRegister reg, int mem) {
5229 __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
5230 __ movq(Address(CpuRegister(RSP), mem), reg);
5231 __ movq(reg, CpuRegister(TMP));
5232 }
5233
Exchange64(int mem1,int mem2)5234 void ParallelMoveResolverX86_64::Exchange64(int mem1, int mem2) {
5235 ScratchRegisterScope ensure_scratch(
5236 this, TMP, RAX, codegen_->GetNumberOfCoreRegisters());
5237
5238 int stack_offset = ensure_scratch.IsSpilled() ? kX86_64WordSize : 0;
5239 __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem1 + stack_offset));
5240 __ movq(CpuRegister(ensure_scratch.GetRegister()),
5241 Address(CpuRegister(RSP), mem2 + stack_offset));
5242 __ movq(Address(CpuRegister(RSP), mem2 + stack_offset), CpuRegister(TMP));
5243 __ movq(Address(CpuRegister(RSP), mem1 + stack_offset),
5244 CpuRegister(ensure_scratch.GetRegister()));
5245 }
5246
Exchange32(XmmRegister reg,int mem)5247 void ParallelMoveResolverX86_64::Exchange32(XmmRegister reg, int mem) {
5248 __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
5249 __ movss(Address(CpuRegister(RSP), mem), reg);
5250 __ movd(reg, CpuRegister(TMP));
5251 }
5252
Exchange64(XmmRegister reg,int mem)5253 void ParallelMoveResolverX86_64::Exchange64(XmmRegister reg, int mem) {
5254 __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
5255 __ movsd(Address(CpuRegister(RSP), mem), reg);
5256 __ movd(reg, CpuRegister(TMP));
5257 }
5258
EmitSwap(size_t index)5259 void ParallelMoveResolverX86_64::EmitSwap(size_t index) {
5260 MoveOperands* move = moves_[index];
5261 Location source = move->GetSource();
5262 Location destination = move->GetDestination();
5263
5264 if (source.IsRegister() && destination.IsRegister()) {
5265 Exchange64(source.AsRegister<CpuRegister>(), destination.AsRegister<CpuRegister>());
5266 } else if (source.IsRegister() && destination.IsStackSlot()) {
5267 Exchange32(source.AsRegister<CpuRegister>(), destination.GetStackIndex());
5268 } else if (source.IsStackSlot() && destination.IsRegister()) {
5269 Exchange32(destination.AsRegister<CpuRegister>(), source.GetStackIndex());
5270 } else if (source.IsStackSlot() && destination.IsStackSlot()) {
5271 Exchange32(destination.GetStackIndex(), source.GetStackIndex());
5272 } else if (source.IsRegister() && destination.IsDoubleStackSlot()) {
5273 Exchange64(source.AsRegister<CpuRegister>(), destination.GetStackIndex());
5274 } else if (source.IsDoubleStackSlot() && destination.IsRegister()) {
5275 Exchange64(destination.AsRegister<CpuRegister>(), source.GetStackIndex());
5276 } else if (source.IsDoubleStackSlot() && destination.IsDoubleStackSlot()) {
5277 Exchange64(destination.GetStackIndex(), source.GetStackIndex());
5278 } else if (source.IsFpuRegister() && destination.IsFpuRegister()) {
5279 __ movd(CpuRegister(TMP), source.AsFpuRegister<XmmRegister>());
5280 __ movaps(source.AsFpuRegister<XmmRegister>(), destination.AsFpuRegister<XmmRegister>());
5281 __ movd(destination.AsFpuRegister<XmmRegister>(), CpuRegister(TMP));
5282 } else if (source.IsFpuRegister() && destination.IsStackSlot()) {
5283 Exchange32(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
5284 } else if (source.IsStackSlot() && destination.IsFpuRegister()) {
5285 Exchange32(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
5286 } else if (source.IsFpuRegister() && destination.IsDoubleStackSlot()) {
5287 Exchange64(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
5288 } else if (source.IsDoubleStackSlot() && destination.IsFpuRegister()) {
5289 Exchange64(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
5290 } else {
5291 LOG(FATAL) << "Unimplemented swap between " << source << " and " << destination;
5292 }
5293 }
5294
5295
SpillScratch(int reg)5296 void ParallelMoveResolverX86_64::SpillScratch(int reg) {
5297 __ pushq(CpuRegister(reg));
5298 }
5299
5300
RestoreScratch(int reg)5301 void ParallelMoveResolverX86_64::RestoreScratch(int reg) {
5302 __ popq(CpuRegister(reg));
5303 }
5304
GenerateClassInitializationCheck(SlowPathCode * slow_path,CpuRegister class_reg)5305 void InstructionCodeGeneratorX86_64::GenerateClassInitializationCheck(
5306 SlowPathCode* slow_path, CpuRegister class_reg) {
5307 __ cmpl(Address(class_reg, mirror::Class::StatusOffset().Int32Value()),
5308 Immediate(mirror::Class::kStatusInitialized));
5309 __ j(kLess, slow_path->GetEntryLabel());
5310 __ Bind(slow_path->GetExitLabel());
5311 // No need for memory fence, thanks to the x86-64 memory model.
5312 }
5313
VisitLoadClass(HLoadClass * cls)5314 void LocationsBuilderX86_64::VisitLoadClass(HLoadClass* cls) {
5315 InvokeRuntimeCallingConvention calling_convention;
5316 CodeGenerator::CreateLoadClassLocationSummary(
5317 cls,
5318 Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
5319 Location::RegisterLocation(RAX),
5320 /* code_generator_supports_read_barrier */ true);
5321 }
5322
VisitLoadClass(HLoadClass * cls)5323 void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) {
5324 LocationSummary* locations = cls->GetLocations();
5325 if (cls->NeedsAccessCheck()) {
5326 codegen_->MoveConstant(locations->GetTemp(0), cls->GetTypeIndex());
5327 codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pInitializeTypeAndVerifyAccess),
5328 cls,
5329 cls->GetDexPc(),
5330 nullptr);
5331 CheckEntrypointTypes<kQuickInitializeTypeAndVerifyAccess, void*, uint32_t>();
5332 return;
5333 }
5334
5335 Location out_loc = locations->Out();
5336 CpuRegister out = out_loc.AsRegister<CpuRegister>();
5337 CpuRegister current_method = locations->InAt(0).AsRegister<CpuRegister>();
5338
5339 if (cls->IsReferrersClass()) {
5340 DCHECK(!cls->CanCallRuntime());
5341 DCHECK(!cls->MustGenerateClinitCheck());
5342 // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
5343 GenerateGcRootFieldLoad(
5344 cls, out_loc, Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()));
5345 } else {
5346 // /* GcRoot<mirror::Class>[] */ out =
5347 // current_method.ptr_sized_fields_->dex_cache_resolved_types_
5348 __ movq(out, Address(current_method,
5349 ArtMethod::DexCacheResolvedTypesOffset(kX86_64PointerSize).Int32Value()));
5350 // /* GcRoot<mirror::Class> */ out = out[type_index]
5351 GenerateGcRootFieldLoad(
5352 cls, out_loc, Address(out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex())));
5353
5354 if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) {
5355 DCHECK(cls->CanCallRuntime());
5356 SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathX86_64(
5357 cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck());
5358 codegen_->AddSlowPath(slow_path);
5359 if (!cls->IsInDexCache()) {
5360 __ testl(out, out);
5361 __ j(kEqual, slow_path->GetEntryLabel());
5362 }
5363 if (cls->MustGenerateClinitCheck()) {
5364 GenerateClassInitializationCheck(slow_path, out);
5365 } else {
5366 __ Bind(slow_path->GetExitLabel());
5367 }
5368 }
5369 }
5370 }
5371
VisitClinitCheck(HClinitCheck * check)5372 void LocationsBuilderX86_64::VisitClinitCheck(HClinitCheck* check) {
5373 LocationSummary* locations =
5374 new (GetGraph()->GetArena()) LocationSummary(check, LocationSummary::kCallOnSlowPath);
5375 locations->SetInAt(0, Location::RequiresRegister());
5376 if (check->HasUses()) {
5377 locations->SetOut(Location::SameAsFirstInput());
5378 }
5379 }
5380
VisitClinitCheck(HClinitCheck * check)5381 void InstructionCodeGeneratorX86_64::VisitClinitCheck(HClinitCheck* check) {
5382 // We assume the class to not be null.
5383 SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathX86_64(
5384 check->GetLoadClass(), check, check->GetDexPc(), true);
5385 codegen_->AddSlowPath(slow_path);
5386 GenerateClassInitializationCheck(slow_path,
5387 check->GetLocations()->InAt(0).AsRegister<CpuRegister>());
5388 }
5389
GetSupportedLoadStringKind(HLoadString::LoadKind desired_string_load_kind)5390 HLoadString::LoadKind CodeGeneratorX86_64::GetSupportedLoadStringKind(
5391 HLoadString::LoadKind desired_string_load_kind) {
5392 if (kEmitCompilerReadBarrier) {
5393 switch (desired_string_load_kind) {
5394 case HLoadString::LoadKind::kBootImageLinkTimeAddress:
5395 case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
5396 case HLoadString::LoadKind::kBootImageAddress:
5397 // TODO: Implement for read barrier.
5398 return HLoadString::LoadKind::kDexCacheViaMethod;
5399 default:
5400 break;
5401 }
5402 }
5403 switch (desired_string_load_kind) {
5404 case HLoadString::LoadKind::kBootImageLinkTimeAddress:
5405 DCHECK(!GetCompilerOptions().GetCompilePic());
5406 // We prefer the always-available RIP-relative address for the x86-64 boot image.
5407 return HLoadString::LoadKind::kBootImageLinkTimePcRelative;
5408 case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
5409 DCHECK(GetCompilerOptions().GetCompilePic());
5410 break;
5411 case HLoadString::LoadKind::kBootImageAddress:
5412 break;
5413 case HLoadString::LoadKind::kDexCacheAddress:
5414 DCHECK(Runtime::Current()->UseJitCompilation());
5415 break;
5416 case HLoadString::LoadKind::kDexCachePcRelative:
5417 DCHECK(!Runtime::Current()->UseJitCompilation());
5418 break;
5419 case HLoadString::LoadKind::kDexCacheViaMethod:
5420 break;
5421 }
5422 return desired_string_load_kind;
5423 }
5424
VisitLoadString(HLoadString * load)5425 void LocationsBuilderX86_64::VisitLoadString(HLoadString* load) {
5426 LocationSummary::CallKind call_kind = (load->NeedsEnvironment() || kEmitCompilerReadBarrier)
5427 ? LocationSummary::kCallOnSlowPath
5428 : LocationSummary::kNoCall;
5429 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind);
5430 if (load->GetLoadKind() == HLoadString::LoadKind::kDexCacheViaMethod) {
5431 locations->SetInAt(0, Location::RequiresRegister());
5432 }
5433 locations->SetOut(Location::RequiresRegister());
5434 }
5435
VisitLoadString(HLoadString * load)5436 void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) {
5437 LocationSummary* locations = load->GetLocations();
5438 Location out_loc = locations->Out();
5439 CpuRegister out = out_loc.AsRegister<CpuRegister>();
5440
5441 switch (load->GetLoadKind()) {
5442 case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
5443 DCHECK(!kEmitCompilerReadBarrier);
5444 __ leal(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false));
5445 codegen_->RecordStringPatch(load);
5446 return; // No dex cache slow path.
5447 }
5448 case HLoadString::LoadKind::kBootImageAddress: {
5449 DCHECK(!kEmitCompilerReadBarrier);
5450 DCHECK_NE(load->GetAddress(), 0u);
5451 uint32_t address = dchecked_integral_cast<uint32_t>(load->GetAddress());
5452 __ movl(out, Immediate(address)); // Zero-extended.
5453 codegen_->RecordSimplePatch();
5454 return; // No dex cache slow path.
5455 }
5456 case HLoadString::LoadKind::kDexCacheAddress: {
5457 DCHECK_NE(load->GetAddress(), 0u);
5458 if (IsUint<32>(load->GetAddress())) {
5459 Address address = Address::Absolute(load->GetAddress(), /* no_rip */ true);
5460 GenerateGcRootFieldLoad(load, out_loc, address);
5461 } else {
5462 // TODO: Consider using opcode A1, i.e. movl eax, moff32 (with 64-bit address).
5463 __ movq(out, Immediate(load->GetAddress()));
5464 GenerateGcRootFieldLoad(load, out_loc, Address(out, 0));
5465 }
5466 break;
5467 }
5468 case HLoadString::LoadKind::kDexCachePcRelative: {
5469 uint32_t offset = load->GetDexCacheElementOffset();
5470 Label* fixup_label = codegen_->NewPcRelativeDexCacheArrayPatch(load->GetDexFile(), offset);
5471 Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset,
5472 /* no_rip */ false);
5473 GenerateGcRootFieldLoad(load, out_loc, address, fixup_label);
5474 break;
5475 }
5476 case HLoadString::LoadKind::kDexCacheViaMethod: {
5477 CpuRegister current_method = locations->InAt(0).AsRegister<CpuRegister>();
5478
5479 // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
5480 GenerateGcRootFieldLoad(
5481 load, out_loc, Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()));
5482 // /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_
5483 __ movq(out, Address(out, mirror::Class::DexCacheStringsOffset().Uint32Value()));
5484 // /* GcRoot<mirror::String> */ out = out[string_index]
5485 GenerateGcRootFieldLoad(
5486 load, out_loc, Address(out, CodeGenerator::GetCacheOffset(load->GetStringIndex())));
5487 break;
5488 }
5489 default:
5490 LOG(FATAL) << "Unexpected load kind: " << load->GetLoadKind();
5491 UNREACHABLE();
5492 }
5493
5494 if (!load->IsInDexCache()) {
5495 SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathX86_64(load);
5496 codegen_->AddSlowPath(slow_path);
5497 __ testl(out, out);
5498 __ j(kEqual, slow_path->GetEntryLabel());
5499 __ Bind(slow_path->GetExitLabel());
5500 }
5501 }
5502
GetExceptionTlsAddress()5503 static Address GetExceptionTlsAddress() {
5504 return Address::Absolute(Thread::ExceptionOffset<kX86_64WordSize>().Int32Value(),
5505 /* no_rip */ true);
5506 }
5507
VisitLoadException(HLoadException * load)5508 void LocationsBuilderX86_64::VisitLoadException(HLoadException* load) {
5509 LocationSummary* locations =
5510 new (GetGraph()->GetArena()) LocationSummary(load, LocationSummary::kNoCall);
5511 locations->SetOut(Location::RequiresRegister());
5512 }
5513
VisitLoadException(HLoadException * load)5514 void InstructionCodeGeneratorX86_64::VisitLoadException(HLoadException* load) {
5515 __ gs()->movl(load->GetLocations()->Out().AsRegister<CpuRegister>(), GetExceptionTlsAddress());
5516 }
5517
VisitClearException(HClearException * clear)5518 void LocationsBuilderX86_64::VisitClearException(HClearException* clear) {
5519 new (GetGraph()->GetArena()) LocationSummary(clear, LocationSummary::kNoCall);
5520 }
5521
VisitClearException(HClearException * clear ATTRIBUTE_UNUSED)5522 void InstructionCodeGeneratorX86_64::VisitClearException(HClearException* clear ATTRIBUTE_UNUSED) {
5523 __ gs()->movl(GetExceptionTlsAddress(), Immediate(0));
5524 }
5525
VisitThrow(HThrow * instruction)5526 void LocationsBuilderX86_64::VisitThrow(HThrow* instruction) {
5527 LocationSummary* locations =
5528 new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
5529 InvokeRuntimeCallingConvention calling_convention;
5530 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
5531 }
5532
VisitThrow(HThrow * instruction)5533 void InstructionCodeGeneratorX86_64::VisitThrow(HThrow* instruction) {
5534 codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pDeliverException),
5535 instruction,
5536 instruction->GetDexPc(),
5537 nullptr);
5538 CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
5539 }
5540
TypeCheckNeedsATemporary(TypeCheckKind type_check_kind)5541 static bool TypeCheckNeedsATemporary(TypeCheckKind type_check_kind) {
5542 return kEmitCompilerReadBarrier &&
5543 (kUseBakerReadBarrier ||
5544 type_check_kind == TypeCheckKind::kAbstractClassCheck ||
5545 type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
5546 type_check_kind == TypeCheckKind::kArrayObjectCheck);
5547 }
5548
VisitInstanceOf(HInstanceOf * instruction)5549 void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) {
5550 LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
5551 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
5552 switch (type_check_kind) {
5553 case TypeCheckKind::kExactCheck:
5554 case TypeCheckKind::kAbstractClassCheck:
5555 case TypeCheckKind::kClassHierarchyCheck:
5556 case TypeCheckKind::kArrayObjectCheck:
5557 call_kind =
5558 kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
5559 break;
5560 case TypeCheckKind::kArrayCheck:
5561 case TypeCheckKind::kUnresolvedCheck:
5562 case TypeCheckKind::kInterfaceCheck:
5563 call_kind = LocationSummary::kCallOnSlowPath;
5564 break;
5565 }
5566
5567 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
5568 locations->SetInAt(0, Location::RequiresRegister());
5569 locations->SetInAt(1, Location::Any());
5570 // Note that TypeCheckSlowPathX86_64 uses this "out" register too.
5571 locations->SetOut(Location::RequiresRegister());
5572 // When read barriers are enabled, we need a temporary register for
5573 // some cases.
5574 if (TypeCheckNeedsATemporary(type_check_kind)) {
5575 locations->AddTemp(Location::RequiresRegister());
5576 }
5577 }
5578
VisitInstanceOf(HInstanceOf * instruction)5579 void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) {
5580 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
5581 LocationSummary* locations = instruction->GetLocations();
5582 Location obj_loc = locations->InAt(0);
5583 CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
5584 Location cls = locations->InAt(1);
5585 Location out_loc = locations->Out();
5586 CpuRegister out = out_loc.AsRegister<CpuRegister>();
5587 Location maybe_temp_loc = TypeCheckNeedsATemporary(type_check_kind) ?
5588 locations->GetTemp(0) :
5589 Location::NoLocation();
5590 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
5591 uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
5592 uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
5593 uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
5594 SlowPathCode* slow_path = nullptr;
5595 NearLabel done, zero;
5596
5597 // Return 0 if `obj` is null.
5598 // Avoid null check if we know obj is not null.
5599 if (instruction->MustDoNullCheck()) {
5600 __ testl(obj, obj);
5601 __ j(kEqual, &zero);
5602 }
5603
5604 // /* HeapReference<Class> */ out = obj->klass_
5605 GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, maybe_temp_loc);
5606
5607 switch (type_check_kind) {
5608 case TypeCheckKind::kExactCheck: {
5609 if (cls.IsRegister()) {
5610 __ cmpl(out, cls.AsRegister<CpuRegister>());
5611 } else {
5612 DCHECK(cls.IsStackSlot()) << cls;
5613 __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
5614 }
5615 if (zero.IsLinked()) {
5616 // Classes must be equal for the instanceof to succeed.
5617 __ j(kNotEqual, &zero);
5618 __ movl(out, Immediate(1));
5619 __ jmp(&done);
5620 } else {
5621 __ setcc(kEqual, out);
5622 // setcc only sets the low byte.
5623 __ andl(out, Immediate(1));
5624 }
5625 break;
5626 }
5627
5628 case TypeCheckKind::kAbstractClassCheck: {
5629 // If the class is abstract, we eagerly fetch the super class of the
5630 // object to avoid doing a comparison we know will fail.
5631 NearLabel loop, success;
5632 __ Bind(&loop);
5633 // /* HeapReference<Class> */ out = out->super_class_
5634 GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, maybe_temp_loc);
5635 __ testl(out, out);
5636 // If `out` is null, we use it for the result, and jump to `done`.
5637 __ j(kEqual, &done);
5638 if (cls.IsRegister()) {
5639 __ cmpl(out, cls.AsRegister<CpuRegister>());
5640 } else {
5641 DCHECK(cls.IsStackSlot()) << cls;
5642 __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
5643 }
5644 __ j(kNotEqual, &loop);
5645 __ movl(out, Immediate(1));
5646 if (zero.IsLinked()) {
5647 __ jmp(&done);
5648 }
5649 break;
5650 }
5651
5652 case TypeCheckKind::kClassHierarchyCheck: {
5653 // Walk over the class hierarchy to find a match.
5654 NearLabel loop, success;
5655 __ Bind(&loop);
5656 if (cls.IsRegister()) {
5657 __ cmpl(out, cls.AsRegister<CpuRegister>());
5658 } else {
5659 DCHECK(cls.IsStackSlot()) << cls;
5660 __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
5661 }
5662 __ j(kEqual, &success);
5663 // /* HeapReference<Class> */ out = out->super_class_
5664 GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, maybe_temp_loc);
5665 __ testl(out, out);
5666 __ j(kNotEqual, &loop);
5667 // If `out` is null, we use it for the result, and jump to `done`.
5668 __ jmp(&done);
5669 __ Bind(&success);
5670 __ movl(out, Immediate(1));
5671 if (zero.IsLinked()) {
5672 __ jmp(&done);
5673 }
5674 break;
5675 }
5676
5677 case TypeCheckKind::kArrayObjectCheck: {
5678 // Do an exact check.
5679 NearLabel exact_check;
5680 if (cls.IsRegister()) {
5681 __ cmpl(out, cls.AsRegister<CpuRegister>());
5682 } else {
5683 DCHECK(cls.IsStackSlot()) << cls;
5684 __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
5685 }
5686 __ j(kEqual, &exact_check);
5687 // Otherwise, we need to check that the object's class is a non-primitive array.
5688 // /* HeapReference<Class> */ out = out->component_type_
5689 GenerateReferenceLoadOneRegister(instruction, out_loc, component_offset, maybe_temp_loc);
5690 __ testl(out, out);
5691 // If `out` is null, we use it for the result, and jump to `done`.
5692 __ j(kEqual, &done);
5693 __ cmpw(Address(out, primitive_offset), Immediate(Primitive::kPrimNot));
5694 __ j(kNotEqual, &zero);
5695 __ Bind(&exact_check);
5696 __ movl(out, Immediate(1));
5697 __ jmp(&done);
5698 break;
5699 }
5700
5701 case TypeCheckKind::kArrayCheck: {
5702 if (cls.IsRegister()) {
5703 __ cmpl(out, cls.AsRegister<CpuRegister>());
5704 } else {
5705 DCHECK(cls.IsStackSlot()) << cls;
5706 __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
5707 }
5708 DCHECK(locations->OnlyCallsOnSlowPath());
5709 slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86_64(instruction,
5710 /* is_fatal */ false);
5711 codegen_->AddSlowPath(slow_path);
5712 __ j(kNotEqual, slow_path->GetEntryLabel());
5713 __ movl(out, Immediate(1));
5714 if (zero.IsLinked()) {
5715 __ jmp(&done);
5716 }
5717 break;
5718 }
5719
5720 case TypeCheckKind::kUnresolvedCheck:
5721 case TypeCheckKind::kInterfaceCheck: {
5722 // Note that we indeed only call on slow path, but we always go
5723 // into the slow path for the unresolved and interface check
5724 // cases.
5725 //
5726 // We cannot directly call the InstanceofNonTrivial runtime
5727 // entry point without resorting to a type checking slow path
5728 // here (i.e. by calling InvokeRuntime directly), as it would
5729 // require to assign fixed registers for the inputs of this
5730 // HInstanceOf instruction (following the runtime calling
5731 // convention), which might be cluttered by the potential first
5732 // read barrier emission at the beginning of this method.
5733 //
5734 // TODO: Introduce a new runtime entry point taking the object
5735 // to test (instead of its class) as argument, and let it deal
5736 // with the read barrier issues. This will let us refactor this
5737 // case of the `switch` code as it was previously (with a direct
5738 // call to the runtime not using a type checking slow path).
5739 // This should also be beneficial for the other cases above.
5740 DCHECK(locations->OnlyCallsOnSlowPath());
5741 slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86_64(instruction,
5742 /* is_fatal */ false);
5743 codegen_->AddSlowPath(slow_path);
5744 __ jmp(slow_path->GetEntryLabel());
5745 if (zero.IsLinked()) {
5746 __ jmp(&done);
5747 }
5748 break;
5749 }
5750 }
5751
5752 if (zero.IsLinked()) {
5753 __ Bind(&zero);
5754 __ xorl(out, out);
5755 }
5756
5757 if (done.IsLinked()) {
5758 __ Bind(&done);
5759 }
5760
5761 if (slow_path != nullptr) {
5762 __ Bind(slow_path->GetExitLabel());
5763 }
5764 }
5765
VisitCheckCast(HCheckCast * instruction)5766 void LocationsBuilderX86_64::VisitCheckCast(HCheckCast* instruction) {
5767 LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
5768 bool throws_into_catch = instruction->CanThrowIntoCatchBlock();
5769 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
5770 switch (type_check_kind) {
5771 case TypeCheckKind::kExactCheck:
5772 case TypeCheckKind::kAbstractClassCheck:
5773 case TypeCheckKind::kClassHierarchyCheck:
5774 case TypeCheckKind::kArrayObjectCheck:
5775 call_kind = (throws_into_catch || kEmitCompilerReadBarrier) ?
5776 LocationSummary::kCallOnSlowPath :
5777 LocationSummary::kNoCall; // In fact, call on a fatal (non-returning) slow path.
5778 break;
5779 case TypeCheckKind::kArrayCheck:
5780 case TypeCheckKind::kUnresolvedCheck:
5781 case TypeCheckKind::kInterfaceCheck:
5782 call_kind = LocationSummary::kCallOnSlowPath;
5783 break;
5784 }
5785 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
5786 locations->SetInAt(0, Location::RequiresRegister());
5787 locations->SetInAt(1, Location::Any());
5788 // Note that TypeCheckSlowPathX86_64 uses this "temp" register too.
5789 locations->AddTemp(Location::RequiresRegister());
5790 // When read barriers are enabled, we need an additional temporary
5791 // register for some cases.
5792 if (TypeCheckNeedsATemporary(type_check_kind)) {
5793 locations->AddTemp(Location::RequiresRegister());
5794 }
5795 }
5796
VisitCheckCast(HCheckCast * instruction)5797 void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {
5798 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
5799 LocationSummary* locations = instruction->GetLocations();
5800 Location obj_loc = locations->InAt(0);
5801 CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
5802 Location cls = locations->InAt(1);
5803 Location temp_loc = locations->GetTemp(0);
5804 CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
5805 Location maybe_temp2_loc = TypeCheckNeedsATemporary(type_check_kind) ?
5806 locations->GetTemp(1) :
5807 Location::NoLocation();
5808 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
5809 uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
5810 uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
5811 uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
5812
5813 bool is_type_check_slow_path_fatal =
5814 (type_check_kind == TypeCheckKind::kExactCheck ||
5815 type_check_kind == TypeCheckKind::kAbstractClassCheck ||
5816 type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
5817 type_check_kind == TypeCheckKind::kArrayObjectCheck) &&
5818 !instruction->CanThrowIntoCatchBlock();
5819 SlowPathCode* type_check_slow_path =
5820 new (GetGraph()->GetArena()) TypeCheckSlowPathX86_64(instruction,
5821 is_type_check_slow_path_fatal);
5822 codegen_->AddSlowPath(type_check_slow_path);
5823
5824 switch (type_check_kind) {
5825 case TypeCheckKind::kExactCheck:
5826 case TypeCheckKind::kArrayCheck: {
5827 NearLabel done;
5828 // Avoid null check if we know obj is not null.
5829 if (instruction->MustDoNullCheck()) {
5830 __ testl(obj, obj);
5831 __ j(kEqual, &done);
5832 }
5833
5834 // /* HeapReference<Class> */ temp = obj->klass_
5835 GenerateReferenceLoadTwoRegisters(
5836 instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
5837
5838 if (cls.IsRegister()) {
5839 __ cmpl(temp, cls.AsRegister<CpuRegister>());
5840 } else {
5841 DCHECK(cls.IsStackSlot()) << cls;
5842 __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
5843 }
5844 // Jump to slow path for throwing the exception or doing a
5845 // more involved array check.
5846 __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
5847 __ Bind(&done);
5848 break;
5849 }
5850
5851 case TypeCheckKind::kAbstractClassCheck: {
5852 NearLabel done;
5853 // Avoid null check if we know obj is not null.
5854 if (instruction->MustDoNullCheck()) {
5855 __ testl(obj, obj);
5856 __ j(kEqual, &done);
5857 }
5858
5859 // /* HeapReference<Class> */ temp = obj->klass_
5860 GenerateReferenceLoadTwoRegisters(
5861 instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
5862
5863 // If the class is abstract, we eagerly fetch the super class of the
5864 // object to avoid doing a comparison we know will fail.
5865 NearLabel loop, compare_classes;
5866 __ Bind(&loop);
5867 // /* HeapReference<Class> */ temp = temp->super_class_
5868 GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, maybe_temp2_loc);
5869
5870 // If the class reference currently in `temp` is not null, jump
5871 // to the `compare_classes` label to compare it with the checked
5872 // class.
5873 __ testl(temp, temp);
5874 __ j(kNotEqual, &compare_classes);
5875 // Otherwise, jump to the slow path to throw the exception.
5876 //
5877 // But before, move back the object's class into `temp` before
5878 // going into the slow path, as it has been overwritten in the
5879 // meantime.
5880 // /* HeapReference<Class> */ temp = obj->klass_
5881 GenerateReferenceLoadTwoRegisters(
5882 instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
5883 __ jmp(type_check_slow_path->GetEntryLabel());
5884
5885 __ Bind(&compare_classes);
5886 if (cls.IsRegister()) {
5887 __ cmpl(temp, cls.AsRegister<CpuRegister>());
5888 } else {
5889 DCHECK(cls.IsStackSlot()) << cls;
5890 __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
5891 }
5892 __ j(kNotEqual, &loop);
5893 __ Bind(&done);
5894 break;
5895 }
5896
5897 case TypeCheckKind::kClassHierarchyCheck: {
5898 NearLabel done;
5899 // Avoid null check if we know obj is not null.
5900 if (instruction->MustDoNullCheck()) {
5901 __ testl(obj, obj);
5902 __ j(kEqual, &done);
5903 }
5904
5905 // /* HeapReference<Class> */ temp = obj->klass_
5906 GenerateReferenceLoadTwoRegisters(
5907 instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
5908
5909 // Walk over the class hierarchy to find a match.
5910 NearLabel loop;
5911 __ Bind(&loop);
5912 if (cls.IsRegister()) {
5913 __ cmpl(temp, cls.AsRegister<CpuRegister>());
5914 } else {
5915 DCHECK(cls.IsStackSlot()) << cls;
5916 __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
5917 }
5918 __ j(kEqual, &done);
5919
5920 // /* HeapReference<Class> */ temp = temp->super_class_
5921 GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, maybe_temp2_loc);
5922
5923 // If the class reference currently in `temp` is not null, jump
5924 // back at the beginning of the loop.
5925 __ testl(temp, temp);
5926 __ j(kNotEqual, &loop);
5927 // Otherwise, jump to the slow path to throw the exception.
5928 //
5929 // But before, move back the object's class into `temp` before
5930 // going into the slow path, as it has been overwritten in the
5931 // meantime.
5932 // /* HeapReference<Class> */ temp = obj->klass_
5933 GenerateReferenceLoadTwoRegisters(
5934 instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
5935 __ jmp(type_check_slow_path->GetEntryLabel());
5936 __ Bind(&done);
5937 break;
5938 }
5939
5940 case TypeCheckKind::kArrayObjectCheck: {
5941 // We cannot use a NearLabel here, as its range might be too
5942 // short in some cases when read barriers are enabled. This has
5943 // been observed for instance when the code emitted for this
5944 // case uses high x86-64 registers (R8-R15).
5945 Label done;
5946 // Avoid null check if we know obj is not null.
5947 if (instruction->MustDoNullCheck()) {
5948 __ testl(obj, obj);
5949 __ j(kEqual, &done);
5950 }
5951
5952 // /* HeapReference<Class> */ temp = obj->klass_
5953 GenerateReferenceLoadTwoRegisters(
5954 instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
5955
5956 // Do an exact check.
5957 NearLabel check_non_primitive_component_type;
5958 if (cls.IsRegister()) {
5959 __ cmpl(temp, cls.AsRegister<CpuRegister>());
5960 } else {
5961 DCHECK(cls.IsStackSlot()) << cls;
5962 __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
5963 }
5964 __ j(kEqual, &done);
5965
5966 // Otherwise, we need to check that the object's class is a non-primitive array.
5967 // /* HeapReference<Class> */ temp = temp->component_type_
5968 GenerateReferenceLoadOneRegister(instruction, temp_loc, component_offset, maybe_temp2_loc);
5969
5970 // If the component type is not null (i.e. the object is indeed
5971 // an array), jump to label `check_non_primitive_component_type`
5972 // to further check that this component type is not a primitive
5973 // type.
5974 __ testl(temp, temp);
5975 __ j(kNotEqual, &check_non_primitive_component_type);
5976 // Otherwise, jump to the slow path to throw the exception.
5977 //
5978 // But before, move back the object's class into `temp` before
5979 // going into the slow path, as it has been overwritten in the
5980 // meantime.
5981 // /* HeapReference<Class> */ temp = obj->klass_
5982 GenerateReferenceLoadTwoRegisters(
5983 instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
5984 __ jmp(type_check_slow_path->GetEntryLabel());
5985
5986 __ Bind(&check_non_primitive_component_type);
5987 __ cmpw(Address(temp, primitive_offset), Immediate(Primitive::kPrimNot));
5988 __ j(kEqual, &done);
5989 // Same comment as above regarding `temp` and the slow path.
5990 // /* HeapReference<Class> */ temp = obj->klass_
5991 GenerateReferenceLoadTwoRegisters(
5992 instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
5993 __ jmp(type_check_slow_path->GetEntryLabel());
5994 __ Bind(&done);
5995 break;
5996 }
5997
5998 case TypeCheckKind::kUnresolvedCheck:
5999 case TypeCheckKind::kInterfaceCheck:
6000 NearLabel done;
6001 // Avoid null check if we know obj is not null.
6002 if (instruction->MustDoNullCheck()) {
6003 __ testl(obj, obj);
6004 __ j(kEqual, &done);
6005 }
6006
6007 // /* HeapReference<Class> */ temp = obj->klass_
6008 GenerateReferenceLoadTwoRegisters(
6009 instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
6010
6011 // We always go into the type check slow path for the unresolved
6012 // and interface check cases.
6013 //
6014 // We cannot directly call the CheckCast runtime entry point
6015 // without resorting to a type checking slow path here (i.e. by
6016 // calling InvokeRuntime directly), as it would require to
6017 // assign fixed registers for the inputs of this HInstanceOf
6018 // instruction (following the runtime calling convention), which
6019 // might be cluttered by the potential first read barrier
6020 // emission at the beginning of this method.
6021 //
6022 // TODO: Introduce a new runtime entry point taking the object
6023 // to test (instead of its class) as argument, and let it deal
6024 // with the read barrier issues. This will let us refactor this
6025 // case of the `switch` code as it was previously (with a direct
6026 // call to the runtime not using a type checking slow path).
6027 // This should also be beneficial for the other cases above.
6028 __ jmp(type_check_slow_path->GetEntryLabel());
6029 __ Bind(&done);
6030 break;
6031 }
6032
6033 __ Bind(type_check_slow_path->GetExitLabel());
6034 }
6035
VisitMonitorOperation(HMonitorOperation * instruction)6036 void LocationsBuilderX86_64::VisitMonitorOperation(HMonitorOperation* instruction) {
6037 LocationSummary* locations =
6038 new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
6039 InvokeRuntimeCallingConvention calling_convention;
6040 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
6041 }
6042
VisitMonitorOperation(HMonitorOperation * instruction)6043 void InstructionCodeGeneratorX86_64::VisitMonitorOperation(HMonitorOperation* instruction) {
6044 codegen_->InvokeRuntime(instruction->IsEnter() ? QUICK_ENTRY_POINT(pLockObject)
6045 : QUICK_ENTRY_POINT(pUnlockObject),
6046 instruction,
6047 instruction->GetDexPc(),
6048 nullptr);
6049 if (instruction->IsEnter()) {
6050 CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
6051 } else {
6052 CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
6053 }
6054 }
6055
VisitAnd(HAnd * instruction)6056 void LocationsBuilderX86_64::VisitAnd(HAnd* instruction) { HandleBitwiseOperation(instruction); }
VisitOr(HOr * instruction)6057 void LocationsBuilderX86_64::VisitOr(HOr* instruction) { HandleBitwiseOperation(instruction); }
VisitXor(HXor * instruction)6058 void LocationsBuilderX86_64::VisitXor(HXor* instruction) { HandleBitwiseOperation(instruction); }
6059
HandleBitwiseOperation(HBinaryOperation * instruction)6060 void LocationsBuilderX86_64::HandleBitwiseOperation(HBinaryOperation* instruction) {
6061 LocationSummary* locations =
6062 new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
6063 DCHECK(instruction->GetResultType() == Primitive::kPrimInt
6064 || instruction->GetResultType() == Primitive::kPrimLong);
6065 locations->SetInAt(0, Location::RequiresRegister());
6066 locations->SetInAt(1, Location::Any());
6067 locations->SetOut(Location::SameAsFirstInput());
6068 }
6069
VisitAnd(HAnd * instruction)6070 void InstructionCodeGeneratorX86_64::VisitAnd(HAnd* instruction) {
6071 HandleBitwiseOperation(instruction);
6072 }
6073
VisitOr(HOr * instruction)6074 void InstructionCodeGeneratorX86_64::VisitOr(HOr* instruction) {
6075 HandleBitwiseOperation(instruction);
6076 }
6077
VisitXor(HXor * instruction)6078 void InstructionCodeGeneratorX86_64::VisitXor(HXor* instruction) {
6079 HandleBitwiseOperation(instruction);
6080 }
6081
HandleBitwiseOperation(HBinaryOperation * instruction)6082 void InstructionCodeGeneratorX86_64::HandleBitwiseOperation(HBinaryOperation* instruction) {
6083 LocationSummary* locations = instruction->GetLocations();
6084 Location first = locations->InAt(0);
6085 Location second = locations->InAt(1);
6086 DCHECK(first.Equals(locations->Out()));
6087
6088 if (instruction->GetResultType() == Primitive::kPrimInt) {
6089 if (second.IsRegister()) {
6090 if (instruction->IsAnd()) {
6091 __ andl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
6092 } else if (instruction->IsOr()) {
6093 __ orl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
6094 } else {
6095 DCHECK(instruction->IsXor());
6096 __ xorl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
6097 }
6098 } else if (second.IsConstant()) {
6099 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue());
6100 if (instruction->IsAnd()) {
6101 __ andl(first.AsRegister<CpuRegister>(), imm);
6102 } else if (instruction->IsOr()) {
6103 __ orl(first.AsRegister<CpuRegister>(), imm);
6104 } else {
6105 DCHECK(instruction->IsXor());
6106 __ xorl(first.AsRegister<CpuRegister>(), imm);
6107 }
6108 } else {
6109 Address address(CpuRegister(RSP), second.GetStackIndex());
6110 if (instruction->IsAnd()) {
6111 __ andl(first.AsRegister<CpuRegister>(), address);
6112 } else if (instruction->IsOr()) {
6113 __ orl(first.AsRegister<CpuRegister>(), address);
6114 } else {
6115 DCHECK(instruction->IsXor());
6116 __ xorl(first.AsRegister<CpuRegister>(), address);
6117 }
6118 }
6119 } else {
6120 DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimLong);
6121 CpuRegister first_reg = first.AsRegister<CpuRegister>();
6122 bool second_is_constant = false;
6123 int64_t value = 0;
6124 if (second.IsConstant()) {
6125 second_is_constant = true;
6126 value = second.GetConstant()->AsLongConstant()->GetValue();
6127 }
6128 bool is_int32_value = IsInt<32>(value);
6129
6130 if (instruction->IsAnd()) {
6131 if (second_is_constant) {
6132 if (is_int32_value) {
6133 __ andq(first_reg, Immediate(static_cast<int32_t>(value)));
6134 } else {
6135 __ andq(first_reg, codegen_->LiteralInt64Address(value));
6136 }
6137 } else if (second.IsDoubleStackSlot()) {
6138 __ andq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex()));
6139 } else {
6140 __ andq(first_reg, second.AsRegister<CpuRegister>());
6141 }
6142 } else if (instruction->IsOr()) {
6143 if (second_is_constant) {
6144 if (is_int32_value) {
6145 __ orq(first_reg, Immediate(static_cast<int32_t>(value)));
6146 } else {
6147 __ orq(first_reg, codegen_->LiteralInt64Address(value));
6148 }
6149 } else if (second.IsDoubleStackSlot()) {
6150 __ orq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex()));
6151 } else {
6152 __ orq(first_reg, second.AsRegister<CpuRegister>());
6153 }
6154 } else {
6155 DCHECK(instruction->IsXor());
6156 if (second_is_constant) {
6157 if (is_int32_value) {
6158 __ xorq(first_reg, Immediate(static_cast<int32_t>(value)));
6159 } else {
6160 __ xorq(first_reg, codegen_->LiteralInt64Address(value));
6161 }
6162 } else if (second.IsDoubleStackSlot()) {
6163 __ xorq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex()));
6164 } else {
6165 __ xorq(first_reg, second.AsRegister<CpuRegister>());
6166 }
6167 }
6168 }
6169 }
6170
GenerateReferenceLoadOneRegister(HInstruction * instruction,Location out,uint32_t offset,Location maybe_temp)6171 void InstructionCodeGeneratorX86_64::GenerateReferenceLoadOneRegister(HInstruction* instruction,
6172 Location out,
6173 uint32_t offset,
6174 Location maybe_temp) {
6175 CpuRegister out_reg = out.AsRegister<CpuRegister>();
6176 if (kEmitCompilerReadBarrier) {
6177 DCHECK(maybe_temp.IsRegister()) << maybe_temp;
6178 if (kUseBakerReadBarrier) {
6179 // Load with fast path based Baker's read barrier.
6180 // /* HeapReference<Object> */ out = *(out + offset)
6181 codegen_->GenerateFieldLoadWithBakerReadBarrier(
6182 instruction, out, out_reg, offset, maybe_temp, /* needs_null_check */ false);
6183 } else {
6184 // Load with slow path based read barrier.
6185 // Save the value of `out` into `maybe_temp` before overwriting it
6186 // in the following move operation, as we will need it for the
6187 // read barrier below.
6188 __ movl(maybe_temp.AsRegister<CpuRegister>(), out_reg);
6189 // /* HeapReference<Object> */ out = *(out + offset)
6190 __ movl(out_reg, Address(out_reg, offset));
6191 codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset);
6192 }
6193 } else {
6194 // Plain load with no read barrier.
6195 // /* HeapReference<Object> */ out = *(out + offset)
6196 __ movl(out_reg, Address(out_reg, offset));
6197 __ MaybeUnpoisonHeapReference(out_reg);
6198 }
6199 }
6200
GenerateReferenceLoadTwoRegisters(HInstruction * instruction,Location out,Location obj,uint32_t offset,Location maybe_temp)6201 void InstructionCodeGeneratorX86_64::GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
6202 Location out,
6203 Location obj,
6204 uint32_t offset,
6205 Location maybe_temp) {
6206 CpuRegister out_reg = out.AsRegister<CpuRegister>();
6207 CpuRegister obj_reg = obj.AsRegister<CpuRegister>();
6208 if (kEmitCompilerReadBarrier) {
6209 if (kUseBakerReadBarrier) {
6210 DCHECK(maybe_temp.IsRegister()) << maybe_temp;
6211 // Load with fast path based Baker's read barrier.
6212 // /* HeapReference<Object> */ out = *(obj + offset)
6213 codegen_->GenerateFieldLoadWithBakerReadBarrier(
6214 instruction, out, obj_reg, offset, maybe_temp, /* needs_null_check */ false);
6215 } else {
6216 // Load with slow path based read barrier.
6217 // /* HeapReference<Object> */ out = *(obj + offset)
6218 __ movl(out_reg, Address(obj_reg, offset));
6219 codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset);
6220 }
6221 } else {
6222 // Plain load with no read barrier.
6223 // /* HeapReference<Object> */ out = *(obj + offset)
6224 __ movl(out_reg, Address(obj_reg, offset));
6225 __ MaybeUnpoisonHeapReference(out_reg);
6226 }
6227 }
6228
GenerateGcRootFieldLoad(HInstruction * instruction,Location root,const Address & address,Label * fixup_label)6229 void InstructionCodeGeneratorX86_64::GenerateGcRootFieldLoad(HInstruction* instruction,
6230 Location root,
6231 const Address& address,
6232 Label* fixup_label) {
6233 CpuRegister root_reg = root.AsRegister<CpuRegister>();
6234 if (kEmitCompilerReadBarrier) {
6235 if (kUseBakerReadBarrier) {
6236 // Fast path implementation of art::ReadBarrier::BarrierForRoot when
6237 // Baker's read barrier are used:
6238 //
6239 // root = *address;
6240 // if (Thread::Current()->GetIsGcMarking()) {
6241 // root = ReadBarrier::Mark(root)
6242 // }
6243
6244 // /* GcRoot<mirror::Object> */ root = *address
6245 __ movl(root_reg, address);
6246 if (fixup_label != nullptr) {
6247 __ Bind(fixup_label);
6248 }
6249 static_assert(
6250 sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
6251 "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
6252 "have different sizes.");
6253 static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
6254 "art::mirror::CompressedReference<mirror::Object> and int32_t "
6255 "have different sizes.");
6256
6257 // Slow path used to mark the GC root `root`.
6258 SlowPathCode* slow_path =
6259 new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86_64(instruction, root, root);
6260 codegen_->AddSlowPath(slow_path);
6261
6262 __ gs()->cmpl(Address::Absolute(Thread::IsGcMarkingOffset<kX86_64WordSize>().Int32Value(),
6263 /* no_rip */ true),
6264 Immediate(0));
6265 __ j(kNotEqual, slow_path->GetEntryLabel());
6266 __ Bind(slow_path->GetExitLabel());
6267 } else {
6268 // GC root loaded through a slow path for read barriers other
6269 // than Baker's.
6270 // /* GcRoot<mirror::Object>* */ root = address
6271 __ leaq(root_reg, address);
6272 if (fixup_label != nullptr) {
6273 __ Bind(fixup_label);
6274 }
6275 // /* mirror::Object* */ root = root->Read()
6276 codegen_->GenerateReadBarrierForRootSlow(instruction, root, root);
6277 }
6278 } else {
6279 // Plain GC root load with no read barrier.
6280 // /* GcRoot<mirror::Object> */ root = *address
6281 __ movl(root_reg, address);
6282 if (fixup_label != nullptr) {
6283 __ Bind(fixup_label);
6284 }
6285 // Note that GC roots are not affected by heap poisoning, thus we
6286 // do not have to unpoison `root_reg` here.
6287 }
6288 }
6289
GenerateFieldLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,CpuRegister obj,uint32_t offset,Location temp,bool needs_null_check)6290 void CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
6291 Location ref,
6292 CpuRegister obj,
6293 uint32_t offset,
6294 Location temp,
6295 bool needs_null_check) {
6296 DCHECK(kEmitCompilerReadBarrier);
6297 DCHECK(kUseBakerReadBarrier);
6298
6299 // /* HeapReference<Object> */ ref = *(obj + offset)
6300 Address src(obj, offset);
6301 GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, temp, needs_null_check);
6302 }
6303
GenerateArrayLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,CpuRegister obj,uint32_t data_offset,Location index,Location temp,bool needs_null_check)6304 void CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
6305 Location ref,
6306 CpuRegister obj,
6307 uint32_t data_offset,
6308 Location index,
6309 Location temp,
6310 bool needs_null_check) {
6311 DCHECK(kEmitCompilerReadBarrier);
6312 DCHECK(kUseBakerReadBarrier);
6313
6314 // /* HeapReference<Object> */ ref =
6315 // *(obj + data_offset + index * sizeof(HeapReference<Object>))
6316 Address src = index.IsConstant() ?
6317 Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset) :
6318 Address(obj, index.AsRegister<CpuRegister>(), TIMES_4, data_offset);
6319 GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, temp, needs_null_check);
6320 }
6321
GenerateReferenceLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,CpuRegister obj,const Address & src,Location temp,bool needs_null_check)6322 void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
6323 Location ref,
6324 CpuRegister obj,
6325 const Address& src,
6326 Location temp,
6327 bool needs_null_check) {
6328 DCHECK(kEmitCompilerReadBarrier);
6329 DCHECK(kUseBakerReadBarrier);
6330
6331 // In slow path based read barriers, the read barrier call is
6332 // inserted after the original load. However, in fast path based
6333 // Baker's read barriers, we need to perform the load of
6334 // mirror::Object::monitor_ *before* the original reference load.
6335 // This load-load ordering is required by the read barrier.
6336 // The fast path/slow path (for Baker's algorithm) should look like:
6337 //
6338 // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
6339 // lfence; // Load fence or artificial data dependency to prevent load-load reordering
6340 // HeapReference<Object> ref = *src; // Original reference load.
6341 // bool is_gray = (rb_state == ReadBarrier::gray_ptr_);
6342 // if (is_gray) {
6343 // ref = ReadBarrier::Mark(ref); // Performed by runtime entrypoint slow path.
6344 // }
6345 //
6346 // Note: the original implementation in ReadBarrier::Barrier is
6347 // slightly more complex as:
6348 // - it implements the load-load fence using a data dependency on
6349 // the high-bits of rb_state, which are expected to be all zeroes
6350 // (we use CodeGeneratorX86_64::GenerateMemoryBarrier instead
6351 // here, which is a no-op thanks to the x86-64 memory model);
6352 // - it performs additional checks that we do not do here for
6353 // performance reasons.
6354
6355 CpuRegister ref_reg = ref.AsRegister<CpuRegister>();
6356 CpuRegister temp_reg = temp.AsRegister<CpuRegister>();
6357 uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
6358
6359 // /* int32_t */ monitor = obj->monitor_
6360 __ movl(temp_reg, Address(obj, monitor_offset));
6361 if (needs_null_check) {
6362 MaybeRecordImplicitNullCheck(instruction);
6363 }
6364 // /* LockWord */ lock_word = LockWord(monitor)
6365 static_assert(sizeof(LockWord) == sizeof(int32_t),
6366 "art::LockWord and int32_t have different sizes.");
6367 // /* uint32_t */ rb_state = lock_word.ReadBarrierState()
6368 __ shrl(temp_reg, Immediate(LockWord::kReadBarrierStateShift));
6369 __ andl(temp_reg, Immediate(LockWord::kReadBarrierStateMask));
6370 static_assert(
6371 LockWord::kReadBarrierStateMask == ReadBarrier::rb_ptr_mask_,
6372 "art::LockWord::kReadBarrierStateMask is not equal to art::ReadBarrier::rb_ptr_mask_.");
6373
6374 // Load fence to prevent load-load reordering.
6375 // Note that this is a no-op, thanks to the x86-64 memory model.
6376 GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
6377
6378 // The actual reference load.
6379 // /* HeapReference<Object> */ ref = *src
6380 __ movl(ref_reg, src);
6381
6382 // Object* ref = ref_addr->AsMirrorPtr()
6383 __ MaybeUnpoisonHeapReference(ref_reg);
6384
6385 // Slow path used to mark the object `ref` when it is gray.
6386 SlowPathCode* slow_path =
6387 new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86_64(instruction, ref, ref);
6388 AddSlowPath(slow_path);
6389
6390 // if (rb_state == ReadBarrier::gray_ptr_)
6391 // ref = ReadBarrier::Mark(ref);
6392 __ cmpl(temp_reg, Immediate(ReadBarrier::gray_ptr_));
6393 __ j(kEqual, slow_path->GetEntryLabel());
6394 __ Bind(slow_path->GetExitLabel());
6395 }
6396
GenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)6397 void CodeGeneratorX86_64::GenerateReadBarrierSlow(HInstruction* instruction,
6398 Location out,
6399 Location ref,
6400 Location obj,
6401 uint32_t offset,
6402 Location index) {
6403 DCHECK(kEmitCompilerReadBarrier);
6404
6405 // Insert a slow path based read barrier *after* the reference load.
6406 //
6407 // If heap poisoning is enabled, the unpoisoning of the loaded
6408 // reference will be carried out by the runtime within the slow
6409 // path.
6410 //
6411 // Note that `ref` currently does not get unpoisoned (when heap
6412 // poisoning is enabled), which is alright as the `ref` argument is
6413 // not used by the artReadBarrierSlow entry point.
6414 //
6415 // TODO: Unpoison `ref` when it is used by artReadBarrierSlow.
6416 SlowPathCode* slow_path = new (GetGraph()->GetArena())
6417 ReadBarrierForHeapReferenceSlowPathX86_64(instruction, out, ref, obj, offset, index);
6418 AddSlowPath(slow_path);
6419
6420 __ jmp(slow_path->GetEntryLabel());
6421 __ Bind(slow_path->GetExitLabel());
6422 }
6423
MaybeGenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)6424 void CodeGeneratorX86_64::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
6425 Location out,
6426 Location ref,
6427 Location obj,
6428 uint32_t offset,
6429 Location index) {
6430 if (kEmitCompilerReadBarrier) {
6431 // Baker's read barriers shall be handled by the fast path
6432 // (CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier).
6433 DCHECK(!kUseBakerReadBarrier);
6434 // If heap poisoning is enabled, unpoisoning will be taken care of
6435 // by the runtime within the slow path.
6436 GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index);
6437 } else if (kPoisonHeapReferences) {
6438 __ UnpoisonHeapReference(out.AsRegister<CpuRegister>());
6439 }
6440 }
6441
GenerateReadBarrierForRootSlow(HInstruction * instruction,Location out,Location root)6442 void CodeGeneratorX86_64::GenerateReadBarrierForRootSlow(HInstruction* instruction,
6443 Location out,
6444 Location root) {
6445 DCHECK(kEmitCompilerReadBarrier);
6446
6447 // Insert a slow path based read barrier *after* the GC root load.
6448 //
6449 // Note that GC roots are not affected by heap poisoning, so we do
6450 // not need to do anything special for this here.
6451 SlowPathCode* slow_path =
6452 new (GetGraph()->GetArena()) ReadBarrierForRootSlowPathX86_64(instruction, out, root);
6453 AddSlowPath(slow_path);
6454
6455 __ jmp(slow_path->GetEntryLabel());
6456 __ Bind(slow_path->GetExitLabel());
6457 }
6458
VisitBoundType(HBoundType * instruction ATTRIBUTE_UNUSED)6459 void LocationsBuilderX86_64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
6460 // Nothing to do, this should be removed during prepare for register allocator.
6461 LOG(FATAL) << "Unreachable";
6462 }
6463
VisitBoundType(HBoundType * instruction ATTRIBUTE_UNUSED)6464 void InstructionCodeGeneratorX86_64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
6465 // Nothing to do, this should be removed during prepare for register allocator.
6466 LOG(FATAL) << "Unreachable";
6467 }
6468
6469 // Simple implementation of packed switch - generate cascaded compare/jumps.
VisitPackedSwitch(HPackedSwitch * switch_instr)6470 void LocationsBuilderX86_64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
6471 LocationSummary* locations =
6472 new (GetGraph()->GetArena()) LocationSummary(switch_instr, LocationSummary::kNoCall);
6473 locations->SetInAt(0, Location::RequiresRegister());
6474 locations->AddTemp(Location::RequiresRegister());
6475 locations->AddTemp(Location::RequiresRegister());
6476 }
6477
VisitPackedSwitch(HPackedSwitch * switch_instr)6478 void InstructionCodeGeneratorX86_64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
6479 int32_t lower_bound = switch_instr->GetStartValue();
6480 uint32_t num_entries = switch_instr->GetNumEntries();
6481 LocationSummary* locations = switch_instr->GetLocations();
6482 CpuRegister value_reg_in = locations->InAt(0).AsRegister<CpuRegister>();
6483 CpuRegister temp_reg = locations->GetTemp(0).AsRegister<CpuRegister>();
6484 CpuRegister base_reg = locations->GetTemp(1).AsRegister<CpuRegister>();
6485 HBasicBlock* default_block = switch_instr->GetDefaultBlock();
6486
6487 // Should we generate smaller inline compare/jumps?
6488 if (num_entries <= kPackedSwitchJumpTableThreshold) {
6489 // Figure out the correct compare values and jump conditions.
6490 // Handle the first compare/branch as a special case because it might
6491 // jump to the default case.
6492 DCHECK_GT(num_entries, 2u);
6493 Condition first_condition;
6494 uint32_t index;
6495 const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
6496 if (lower_bound != 0) {
6497 first_condition = kLess;
6498 __ cmpl(value_reg_in, Immediate(lower_bound));
6499 __ j(first_condition, codegen_->GetLabelOf(default_block));
6500 __ j(kEqual, codegen_->GetLabelOf(successors[0]));
6501
6502 index = 1;
6503 } else {
6504 // Handle all the compare/jumps below.
6505 first_condition = kBelow;
6506 index = 0;
6507 }
6508
6509 // Handle the rest of the compare/jumps.
6510 for (; index + 1 < num_entries; index += 2) {
6511 int32_t compare_to_value = lower_bound + index + 1;
6512 __ cmpl(value_reg_in, Immediate(compare_to_value));
6513 // Jump to successors[index] if value < case_value[index].
6514 __ j(first_condition, codegen_->GetLabelOf(successors[index]));
6515 // Jump to successors[index + 1] if value == case_value[index + 1].
6516 __ j(kEqual, codegen_->GetLabelOf(successors[index + 1]));
6517 }
6518
6519 if (index != num_entries) {
6520 // There are an odd number of entries. Handle the last one.
6521 DCHECK_EQ(index + 1, num_entries);
6522 __ cmpl(value_reg_in, Immediate(static_cast<int32_t>(lower_bound + index)));
6523 __ j(kEqual, codegen_->GetLabelOf(successors[index]));
6524 }
6525
6526 // And the default for any other value.
6527 if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
6528 __ jmp(codegen_->GetLabelOf(default_block));
6529 }
6530 return;
6531 }
6532
6533 // Remove the bias, if needed.
6534 Register value_reg_out = value_reg_in.AsRegister();
6535 if (lower_bound != 0) {
6536 __ leal(temp_reg, Address(value_reg_in, -lower_bound));
6537 value_reg_out = temp_reg.AsRegister();
6538 }
6539 CpuRegister value_reg(value_reg_out);
6540
6541 // Is the value in range?
6542 __ cmpl(value_reg, Immediate(num_entries - 1));
6543 __ j(kAbove, codegen_->GetLabelOf(default_block));
6544
6545 // We are in the range of the table.
6546 // Load the address of the jump table in the constant area.
6547 __ leaq(base_reg, codegen_->LiteralCaseTable(switch_instr));
6548
6549 // Load the (signed) offset from the jump table.
6550 __ movsxd(temp_reg, Address(base_reg, value_reg, TIMES_4, 0));
6551
6552 // Add the offset to the address of the table base.
6553 __ addq(temp_reg, base_reg);
6554
6555 // And jump.
6556 __ jmp(temp_reg);
6557 }
6558
Load32BitValue(CpuRegister dest,int32_t value)6559 void CodeGeneratorX86_64::Load32BitValue(CpuRegister dest, int32_t value) {
6560 if (value == 0) {
6561 __ xorl(dest, dest);
6562 } else {
6563 __ movl(dest, Immediate(value));
6564 }
6565 }
6566
Load64BitValue(CpuRegister dest,int64_t value)6567 void CodeGeneratorX86_64::Load64BitValue(CpuRegister dest, int64_t value) {
6568 if (value == 0) {
6569 // Clears upper bits too.
6570 __ xorl(dest, dest);
6571 } else if (IsUint<32>(value)) {
6572 // We can use a 32 bit move, as it will zero-extend and is shorter.
6573 __ movl(dest, Immediate(static_cast<int32_t>(value)));
6574 } else {
6575 __ movq(dest, Immediate(value));
6576 }
6577 }
6578
Load32BitValue(XmmRegister dest,int32_t value)6579 void CodeGeneratorX86_64::Load32BitValue(XmmRegister dest, int32_t value) {
6580 if (value == 0) {
6581 __ xorps(dest, dest);
6582 } else {
6583 __ movss(dest, LiteralInt32Address(value));
6584 }
6585 }
6586
Load64BitValue(XmmRegister dest,int64_t value)6587 void CodeGeneratorX86_64::Load64BitValue(XmmRegister dest, int64_t value) {
6588 if (value == 0) {
6589 __ xorpd(dest, dest);
6590 } else {
6591 __ movsd(dest, LiteralInt64Address(value));
6592 }
6593 }
6594
Load32BitValue(XmmRegister dest,float value)6595 void CodeGeneratorX86_64::Load32BitValue(XmmRegister dest, float value) {
6596 Load32BitValue(dest, bit_cast<int32_t, float>(value));
6597 }
6598
Load64BitValue(XmmRegister dest,double value)6599 void CodeGeneratorX86_64::Load64BitValue(XmmRegister dest, double value) {
6600 Load64BitValue(dest, bit_cast<int64_t, double>(value));
6601 }
6602
Compare32BitValue(CpuRegister dest,int32_t value)6603 void CodeGeneratorX86_64::Compare32BitValue(CpuRegister dest, int32_t value) {
6604 if (value == 0) {
6605 __ testl(dest, dest);
6606 } else {
6607 __ cmpl(dest, Immediate(value));
6608 }
6609 }
6610
Compare64BitValue(CpuRegister dest,int64_t value)6611 void CodeGeneratorX86_64::Compare64BitValue(CpuRegister dest, int64_t value) {
6612 if (IsInt<32>(value)) {
6613 if (value == 0) {
6614 __ testq(dest, dest);
6615 } else {
6616 __ cmpq(dest, Immediate(static_cast<int32_t>(value)));
6617 }
6618 } else {
6619 // Value won't fit in an int.
6620 __ cmpq(dest, LiteralInt64Address(value));
6621 }
6622 }
6623
Store64BitValueToStack(Location dest,int64_t value)6624 void CodeGeneratorX86_64::Store64BitValueToStack(Location dest, int64_t value) {
6625 DCHECK(dest.IsDoubleStackSlot());
6626 if (IsInt<32>(value)) {
6627 // Can move directly as an int32 constant.
6628 __ movq(Address(CpuRegister(RSP), dest.GetStackIndex()),
6629 Immediate(static_cast<int32_t>(value)));
6630 } else {
6631 Load64BitValue(CpuRegister(TMP), value);
6632 __ movq(Address(CpuRegister(RSP), dest.GetStackIndex()), CpuRegister(TMP));
6633 }
6634 }
6635
6636 /**
6637 * Class to handle late fixup of offsets into constant area.
6638 */
6639 class RIPFixup : public AssemblerFixup, public ArenaObject<kArenaAllocCodeGenerator> {
6640 public:
RIPFixup(CodeGeneratorX86_64 & codegen,size_t offset)6641 RIPFixup(CodeGeneratorX86_64& codegen, size_t offset)
6642 : codegen_(&codegen), offset_into_constant_area_(offset) {}
6643
6644 protected:
SetOffset(size_t offset)6645 void SetOffset(size_t offset) { offset_into_constant_area_ = offset; }
6646
6647 CodeGeneratorX86_64* codegen_;
6648
6649 private:
Process(const MemoryRegion & region,int pos)6650 void Process(const MemoryRegion& region, int pos) OVERRIDE {
6651 // Patch the correct offset for the instruction. We use the address of the
6652 // 'next' instruction, which is 'pos' (patch the 4 bytes before).
6653 int32_t constant_offset = codegen_->ConstantAreaStart() + offset_into_constant_area_;
6654 int32_t relative_position = constant_offset - pos;
6655
6656 // Patch in the right value.
6657 region.StoreUnaligned<int32_t>(pos - 4, relative_position);
6658 }
6659
6660 // Location in constant area that the fixup refers to.
6661 size_t offset_into_constant_area_;
6662 };
6663
6664 /**
6665 t * Class to handle late fixup of offsets to a jump table that will be created in the
6666 * constant area.
6667 */
6668 class JumpTableRIPFixup : public RIPFixup {
6669 public:
JumpTableRIPFixup(CodeGeneratorX86_64 & codegen,HPackedSwitch * switch_instr)6670 JumpTableRIPFixup(CodeGeneratorX86_64& codegen, HPackedSwitch* switch_instr)
6671 : RIPFixup(codegen, -1), switch_instr_(switch_instr) {}
6672
CreateJumpTable()6673 void CreateJumpTable() {
6674 X86_64Assembler* assembler = codegen_->GetAssembler();
6675
6676 // Ensure that the reference to the jump table has the correct offset.
6677 const int32_t offset_in_constant_table = assembler->ConstantAreaSize();
6678 SetOffset(offset_in_constant_table);
6679
6680 // Compute the offset from the start of the function to this jump table.
6681 const int32_t current_table_offset = assembler->CodeSize() + offset_in_constant_table;
6682
6683 // Populate the jump table with the correct values for the jump table.
6684 int32_t num_entries = switch_instr_->GetNumEntries();
6685 HBasicBlock* block = switch_instr_->GetBlock();
6686 const ArenaVector<HBasicBlock*>& successors = block->GetSuccessors();
6687 // The value that we want is the target offset - the position of the table.
6688 for (int32_t i = 0; i < num_entries; i++) {
6689 HBasicBlock* b = successors[i];
6690 Label* l = codegen_->GetLabelOf(b);
6691 DCHECK(l->IsBound());
6692 int32_t offset_to_block = l->Position() - current_table_offset;
6693 assembler->AppendInt32(offset_to_block);
6694 }
6695 }
6696
6697 private:
6698 const HPackedSwitch* switch_instr_;
6699 };
6700
Finalize(CodeAllocator * allocator)6701 void CodeGeneratorX86_64::Finalize(CodeAllocator* allocator) {
6702 // Generate the constant area if needed.
6703 X86_64Assembler* assembler = GetAssembler();
6704 if (!assembler->IsConstantAreaEmpty() || !fixups_to_jump_tables_.empty()) {
6705 // Align to 4 byte boundary to reduce cache misses, as the data is 4 and 8 byte values.
6706 assembler->Align(4, 0);
6707 constant_area_start_ = assembler->CodeSize();
6708
6709 // Populate any jump tables.
6710 for (auto jump_table : fixups_to_jump_tables_) {
6711 jump_table->CreateJumpTable();
6712 }
6713
6714 // And now add the constant area to the generated code.
6715 assembler->AddConstantArea();
6716 }
6717
6718 // And finish up.
6719 CodeGenerator::Finalize(allocator);
6720 }
6721
LiteralDoubleAddress(double v)6722 Address CodeGeneratorX86_64::LiteralDoubleAddress(double v) {
6723 AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddDouble(v));
6724 return Address::RIP(fixup);
6725 }
6726
LiteralFloatAddress(float v)6727 Address CodeGeneratorX86_64::LiteralFloatAddress(float v) {
6728 AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddFloat(v));
6729 return Address::RIP(fixup);
6730 }
6731
LiteralInt32Address(int32_t v)6732 Address CodeGeneratorX86_64::LiteralInt32Address(int32_t v) {
6733 AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddInt32(v));
6734 return Address::RIP(fixup);
6735 }
6736
LiteralInt64Address(int64_t v)6737 Address CodeGeneratorX86_64::LiteralInt64Address(int64_t v) {
6738 AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddInt64(v));
6739 return Address::RIP(fixup);
6740 }
6741
6742 // TODO: trg as memory.
MoveFromReturnRegister(Location trg,Primitive::Type type)6743 void CodeGeneratorX86_64::MoveFromReturnRegister(Location trg, Primitive::Type type) {
6744 if (!trg.IsValid()) {
6745 DCHECK_EQ(type, Primitive::kPrimVoid);
6746 return;
6747 }
6748
6749 DCHECK_NE(type, Primitive::kPrimVoid);
6750
6751 Location return_loc = InvokeDexCallingConventionVisitorX86_64().GetReturnLocation(type);
6752 if (trg.Equals(return_loc)) {
6753 return;
6754 }
6755
6756 // Let the parallel move resolver take care of all of this.
6757 HParallelMove parallel_move(GetGraph()->GetArena());
6758 parallel_move.AddMove(return_loc, trg, type, nullptr);
6759 GetMoveResolver()->EmitNativeCode(¶llel_move);
6760 }
6761
LiteralCaseTable(HPackedSwitch * switch_instr)6762 Address CodeGeneratorX86_64::LiteralCaseTable(HPackedSwitch* switch_instr) {
6763 // Create a fixup to be used to create and address the jump table.
6764 JumpTableRIPFixup* table_fixup =
6765 new (GetGraph()->GetArena()) JumpTableRIPFixup(*this, switch_instr);
6766
6767 // We have to populate the jump tables.
6768 fixups_to_jump_tables_.push_back(table_fixup);
6769 return Address::RIP(table_fixup);
6770 }
6771
MoveInt64ToAddress(const Address & addr_low,const Address & addr_high,int64_t v,HInstruction * instruction)6772 void CodeGeneratorX86_64::MoveInt64ToAddress(const Address& addr_low,
6773 const Address& addr_high,
6774 int64_t v,
6775 HInstruction* instruction) {
6776 if (IsInt<32>(v)) {
6777 int32_t v_32 = v;
6778 __ movq(addr_low, Immediate(v_32));
6779 MaybeRecordImplicitNullCheck(instruction);
6780 } else {
6781 // Didn't fit in a register. Do it in pieces.
6782 int32_t low_v = Low32Bits(v);
6783 int32_t high_v = High32Bits(v);
6784 __ movl(addr_low, Immediate(low_v));
6785 MaybeRecordImplicitNullCheck(instruction);
6786 __ movl(addr_high, Immediate(high_v));
6787 }
6788 }
6789
6790 #undef __
6791
6792 } // namespace x86_64
6793 } // namespace art
6794