1 // Copyright 2013 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "src/compiler/code-generator.h"
6
7 #include <limits>
8
9 #include "src/compiler/code-generator-impl.h"
10 #include "src/compiler/gap-resolver.h"
11 #include "src/compiler/node-matchers.h"
12 #include "src/compiler/osr.h"
13 #include "src/heap/heap-inl.h"
14 #include "src/optimized-compilation-info.h"
15 #include "src/wasm/wasm-code-manager.h"
16 #include "src/wasm/wasm-objects.h"
17 #include "src/x64/assembler-x64.h"
18 #include "src/x64/macro-assembler-x64.h"
19
20 namespace v8 {
21 namespace internal {
22 namespace compiler {
23
24 #define __ tasm()->
25
26 // Adds X64 specific methods for decoding operands.
27 class X64OperandConverter : public InstructionOperandConverter {
28 public:
X64OperandConverter(CodeGenerator * gen,Instruction * instr)29 X64OperandConverter(CodeGenerator* gen, Instruction* instr)
30 : InstructionOperandConverter(gen, instr) {}
31
InputImmediate(size_t index)32 Immediate InputImmediate(size_t index) {
33 return ToImmediate(instr_->InputAt(index));
34 }
35
InputOperand(size_t index,int extra=0)36 Operand InputOperand(size_t index, int extra = 0) {
37 return ToOperand(instr_->InputAt(index), extra);
38 }
39
OutputOperand()40 Operand OutputOperand() { return ToOperand(instr_->Output()); }
41
ToImmediate(InstructionOperand * operand)42 Immediate ToImmediate(InstructionOperand* operand) {
43 Constant constant = ToConstant(operand);
44 if (constant.type() == Constant::kFloat64) {
45 DCHECK_EQ(0, constant.ToFloat64().AsUint64());
46 return Immediate(0);
47 }
48 if (RelocInfo::IsWasmReference(constant.rmode())) {
49 return Immediate(constant.ToInt32(), constant.rmode());
50 }
51 return Immediate(constant.ToInt32());
52 }
53
ToOperand(InstructionOperand * op,int extra=0)54 Operand ToOperand(InstructionOperand* op, int extra = 0) {
55 DCHECK(op->IsStackSlot() || op->IsFPStackSlot());
56 return SlotToOperand(AllocatedOperand::cast(op)->index(), extra);
57 }
58
SlotToOperand(int slot_index,int extra=0)59 Operand SlotToOperand(int slot_index, int extra = 0) {
60 FrameOffset offset = frame_access_state()->GetFrameOffset(slot_index);
61 return Operand(offset.from_stack_pointer() ? rsp : rbp,
62 offset.offset() + extra);
63 }
64
NextOffset(size_t * offset)65 static size_t NextOffset(size_t* offset) {
66 size_t i = *offset;
67 (*offset)++;
68 return i;
69 }
70
ScaleFor(AddressingMode one,AddressingMode mode)71 static ScaleFactor ScaleFor(AddressingMode one, AddressingMode mode) {
72 STATIC_ASSERT(0 == static_cast<int>(times_1));
73 STATIC_ASSERT(1 == static_cast<int>(times_2));
74 STATIC_ASSERT(2 == static_cast<int>(times_4));
75 STATIC_ASSERT(3 == static_cast<int>(times_8));
76 int scale = static_cast<int>(mode - one);
77 DCHECK(scale >= 0 && scale < 4);
78 return static_cast<ScaleFactor>(scale);
79 }
80
MemoryOperand(size_t * offset)81 Operand MemoryOperand(size_t* offset) {
82 AddressingMode mode = AddressingModeField::decode(instr_->opcode());
83 switch (mode) {
84 case kMode_MR: {
85 Register base = InputRegister(NextOffset(offset));
86 int32_t disp = 0;
87 return Operand(base, disp);
88 }
89 case kMode_MRI: {
90 Register base = InputRegister(NextOffset(offset));
91 int32_t disp = InputInt32(NextOffset(offset));
92 return Operand(base, disp);
93 }
94 case kMode_MR1:
95 case kMode_MR2:
96 case kMode_MR4:
97 case kMode_MR8: {
98 Register base = InputRegister(NextOffset(offset));
99 Register index = InputRegister(NextOffset(offset));
100 ScaleFactor scale = ScaleFor(kMode_MR1, mode);
101 int32_t disp = 0;
102 return Operand(base, index, scale, disp);
103 }
104 case kMode_MR1I:
105 case kMode_MR2I:
106 case kMode_MR4I:
107 case kMode_MR8I: {
108 Register base = InputRegister(NextOffset(offset));
109 Register index = InputRegister(NextOffset(offset));
110 ScaleFactor scale = ScaleFor(kMode_MR1I, mode);
111 int32_t disp = InputInt32(NextOffset(offset));
112 return Operand(base, index, scale, disp);
113 }
114 case kMode_M1: {
115 Register base = InputRegister(NextOffset(offset));
116 int32_t disp = 0;
117 return Operand(base, disp);
118 }
119 case kMode_M2:
120 UNREACHABLE(); // Should use kModeMR with more compact encoding instead
121 return Operand(no_reg, 0);
122 case kMode_M4:
123 case kMode_M8: {
124 Register index = InputRegister(NextOffset(offset));
125 ScaleFactor scale = ScaleFor(kMode_M1, mode);
126 int32_t disp = 0;
127 return Operand(index, scale, disp);
128 }
129 case kMode_M1I:
130 case kMode_M2I:
131 case kMode_M4I:
132 case kMode_M8I: {
133 Register index = InputRegister(NextOffset(offset));
134 ScaleFactor scale = ScaleFor(kMode_M1I, mode);
135 int32_t disp = InputInt32(NextOffset(offset));
136 return Operand(index, scale, disp);
137 }
138 case kMode_Root: {
139 Register base = kRootRegister;
140 int32_t disp = InputInt32(NextOffset(offset));
141 return Operand(base, disp);
142 }
143 case kMode_None:
144 UNREACHABLE();
145 }
146 UNREACHABLE();
147 }
148
MemoryOperand(size_t first_input=0)149 Operand MemoryOperand(size_t first_input = 0) {
150 return MemoryOperand(&first_input);
151 }
152 };
153
154
155 namespace {
156
HasImmediateInput(Instruction * instr,size_t index)157 bool HasImmediateInput(Instruction* instr, size_t index) {
158 return instr->InputAt(index)->IsImmediate();
159 }
160
161 class OutOfLineLoadFloat32NaN final : public OutOfLineCode {
162 public:
OutOfLineLoadFloat32NaN(CodeGenerator * gen,XMMRegister result)163 OutOfLineLoadFloat32NaN(CodeGenerator* gen, XMMRegister result)
164 : OutOfLineCode(gen), result_(result) {}
165
Generate()166 void Generate() final {
167 __ Xorps(result_, result_);
168 __ Divss(result_, result_);
169 }
170
171 private:
172 XMMRegister const result_;
173 };
174
175 class OutOfLineLoadFloat64NaN final : public OutOfLineCode {
176 public:
OutOfLineLoadFloat64NaN(CodeGenerator * gen,XMMRegister result)177 OutOfLineLoadFloat64NaN(CodeGenerator* gen, XMMRegister result)
178 : OutOfLineCode(gen), result_(result) {}
179
Generate()180 void Generate() final {
181 __ Xorpd(result_, result_);
182 __ Divsd(result_, result_);
183 }
184
185 private:
186 XMMRegister const result_;
187 };
188
189 class OutOfLineTruncateDoubleToI final : public OutOfLineCode {
190 public:
OutOfLineTruncateDoubleToI(CodeGenerator * gen,Register result,XMMRegister input,StubCallMode stub_mode,UnwindingInfoWriter * unwinding_info_writer)191 OutOfLineTruncateDoubleToI(CodeGenerator* gen, Register result,
192 XMMRegister input, StubCallMode stub_mode,
193 UnwindingInfoWriter* unwinding_info_writer)
194 : OutOfLineCode(gen),
195 result_(result),
196 input_(input),
197 stub_mode_(stub_mode),
198 unwinding_info_writer_(unwinding_info_writer),
199 isolate_(gen->isolate()),
200 zone_(gen->zone()) {}
201
Generate()202 void Generate() final {
203 __ subp(rsp, Immediate(kDoubleSize));
204 unwinding_info_writer_->MaybeIncreaseBaseOffsetAt(__ pc_offset(),
205 kDoubleSize);
206 __ Movsd(MemOperand(rsp, 0), input_);
207 if (stub_mode_ == StubCallMode::kCallWasmRuntimeStub) {
208 // A direct call to a wasm runtime stub defined in this module.
209 // Just encode the stub index. This will be patched at relocation.
210 __ near_call(wasm::WasmCode::kDoubleToI, RelocInfo::WASM_STUB_CALL);
211 } else {
212 __ Call(BUILTIN_CODE(isolate_, DoubleToI), RelocInfo::CODE_TARGET);
213 }
214 __ movl(result_, MemOperand(rsp, 0));
215 __ addp(rsp, Immediate(kDoubleSize));
216 unwinding_info_writer_->MaybeIncreaseBaseOffsetAt(__ pc_offset(),
217 -kDoubleSize);
218 }
219
220 private:
221 Register const result_;
222 XMMRegister const input_;
223 StubCallMode stub_mode_;
224 UnwindingInfoWriter* const unwinding_info_writer_;
225 Isolate* isolate_;
226 Zone* zone_;
227 };
228
229
230 class OutOfLineRecordWrite final : public OutOfLineCode {
231 public:
OutOfLineRecordWrite(CodeGenerator * gen,Register object,Operand operand,Register value,Register scratch0,Register scratch1,RecordWriteMode mode)232 OutOfLineRecordWrite(CodeGenerator* gen, Register object, Operand operand,
233 Register value, Register scratch0, Register scratch1,
234 RecordWriteMode mode)
235 : OutOfLineCode(gen),
236 object_(object),
237 operand_(operand),
238 value_(value),
239 scratch0_(scratch0),
240 scratch1_(scratch1),
241 mode_(mode),
242 zone_(gen->zone()) {}
243
Generate()244 void Generate() final {
245 if (mode_ > RecordWriteMode::kValueIsPointer) {
246 __ JumpIfSmi(value_, exit());
247 }
248 __ CheckPageFlag(value_, scratch0_,
249 MemoryChunk::kPointersToHereAreInterestingMask, zero,
250 exit());
251 __ leap(scratch1_, operand_);
252
253 RememberedSetAction const remembered_set_action =
254 mode_ > RecordWriteMode::kValueIsMap ? EMIT_REMEMBERED_SET
255 : OMIT_REMEMBERED_SET;
256 SaveFPRegsMode const save_fp_mode =
257 frame()->DidAllocateDoubleRegisters() ? kSaveFPRegs : kDontSaveFPRegs;
258
259 __ CallRecordWriteStub(object_, scratch1_, remembered_set_action,
260 save_fp_mode);
261 }
262
263 private:
264 Register const object_;
265 Operand const operand_;
266 Register const value_;
267 Register const scratch0_;
268 Register const scratch1_;
269 RecordWriteMode const mode_;
270 Zone* zone_;
271 };
272
273 class WasmOutOfLineTrap : public OutOfLineCode {
274 public:
WasmOutOfLineTrap(CodeGenerator * gen,Instruction * instr)275 WasmOutOfLineTrap(CodeGenerator* gen, Instruction* instr)
276 : OutOfLineCode(gen), gen_(gen), instr_(instr) {}
277
Generate()278 void Generate() override {
279 X64OperandConverter i(gen_, instr_);
280 TrapId trap_id =
281 static_cast<TrapId>(i.InputInt32(instr_->InputCount() - 1));
282 GenerateWithTrapId(trap_id);
283 }
284
285 protected:
286 CodeGenerator* gen_;
287
GenerateWithTrapId(TrapId trap_id)288 void GenerateWithTrapId(TrapId trap_id) { GenerateCallToTrap(trap_id); }
289
290 private:
GenerateCallToTrap(TrapId trap_id)291 void GenerateCallToTrap(TrapId trap_id) {
292 if (!gen_->wasm_runtime_exception_support()) {
293 // We cannot test calls to the runtime in cctest/test-run-wasm.
294 // Therefore we emit a call to C here instead of a call to the runtime.
295 __ PrepareCallCFunction(0);
296 __ CallCFunction(ExternalReference::wasm_call_trap_callback_for_testing(),
297 0);
298 __ LeaveFrame(StackFrame::WASM_COMPILED);
299 auto call_descriptor = gen_->linkage()->GetIncomingDescriptor();
300 size_t pop_size = call_descriptor->StackParameterCount() * kPointerSize;
301 // Use rcx as a scratch register, we return anyways immediately.
302 __ Ret(static_cast<int>(pop_size), rcx);
303 } else {
304 gen_->AssembleSourcePosition(instr_);
305 // A direct call to a wasm runtime stub defined in this module.
306 // Just encode the stub index. This will be patched at relocation.
307 __ near_call(static_cast<Address>(trap_id), RelocInfo::WASM_STUB_CALL);
308 ReferenceMap* reference_map =
309 new (gen_->zone()) ReferenceMap(gen_->zone());
310 gen_->RecordSafepoint(reference_map, Safepoint::kSimple, 0,
311 Safepoint::kNoLazyDeopt);
312 __ AssertUnreachable(AbortReason::kUnexpectedReturnFromWasmTrap);
313 }
314 }
315
316 Instruction* instr_;
317 };
318
319 class WasmProtectedInstructionTrap final : public WasmOutOfLineTrap {
320 public:
WasmProtectedInstructionTrap(CodeGenerator * gen,int pc,Instruction * instr)321 WasmProtectedInstructionTrap(CodeGenerator* gen, int pc, Instruction* instr)
322 : WasmOutOfLineTrap(gen, instr), pc_(pc) {}
323
Generate()324 void Generate() final {
325 gen_->AddProtectedInstructionLanding(pc_, __ pc_offset());
326 GenerateWithTrapId(TrapId::kTrapMemOutOfBounds);
327 }
328
329 private:
330 int pc_;
331 };
332
EmitOOLTrapIfNeeded(Zone * zone,CodeGenerator * codegen,InstructionCode opcode,Instruction * instr,X64OperandConverter & i,int pc)333 void EmitOOLTrapIfNeeded(Zone* zone, CodeGenerator* codegen,
334 InstructionCode opcode, Instruction* instr,
335 X64OperandConverter& i, int pc) {
336 const MemoryAccessMode access_mode =
337 static_cast<MemoryAccessMode>(MiscField::decode(opcode));
338 if (access_mode == kMemoryAccessProtected) {
339 new (zone) WasmProtectedInstructionTrap(codegen, pc, instr);
340 }
341 }
342
EmitWordLoadPoisoningIfNeeded(CodeGenerator * codegen,InstructionCode opcode,Instruction * instr,X64OperandConverter & i)343 void EmitWordLoadPoisoningIfNeeded(CodeGenerator* codegen,
344 InstructionCode opcode, Instruction* instr,
345 X64OperandConverter& i) {
346 const MemoryAccessMode access_mode =
347 static_cast<MemoryAccessMode>(MiscField::decode(opcode));
348 if (access_mode == kMemoryAccessPoisoned) {
349 Register value = i.OutputRegister();
350 codegen->tasm()->andq(value, kSpeculationPoisonRegister);
351 }
352 }
353
354 } // namespace
355
356
357 #define ASSEMBLE_UNOP(asm_instr) \
358 do { \
359 if (instr->Output()->IsRegister()) { \
360 __ asm_instr(i.OutputRegister()); \
361 } else { \
362 __ asm_instr(i.OutputOperand()); \
363 } \
364 } while (0)
365
366 #define ASSEMBLE_BINOP(asm_instr) \
367 do { \
368 if (AddressingModeField::decode(instr->opcode()) != kMode_None) { \
369 size_t index = 1; \
370 Operand right = i.MemoryOperand(&index); \
371 __ asm_instr(i.InputRegister(0), right); \
372 } else { \
373 if (HasImmediateInput(instr, 1)) { \
374 if (instr->InputAt(0)->IsRegister()) { \
375 __ asm_instr(i.InputRegister(0), i.InputImmediate(1)); \
376 } else { \
377 __ asm_instr(i.InputOperand(0), i.InputImmediate(1)); \
378 } \
379 } else { \
380 if (instr->InputAt(1)->IsRegister()) { \
381 __ asm_instr(i.InputRegister(0), i.InputRegister(1)); \
382 } else { \
383 __ asm_instr(i.InputRegister(0), i.InputOperand(1)); \
384 } \
385 } \
386 } \
387 } while (0)
388
389 #define ASSEMBLE_COMPARE(asm_instr) \
390 do { \
391 if (AddressingModeField::decode(instr->opcode()) != kMode_None) { \
392 size_t index = 0; \
393 Operand left = i.MemoryOperand(&index); \
394 if (HasImmediateInput(instr, index)) { \
395 __ asm_instr(left, i.InputImmediate(index)); \
396 } else { \
397 __ asm_instr(left, i.InputRegister(index)); \
398 } \
399 } else { \
400 if (HasImmediateInput(instr, 1)) { \
401 if (instr->InputAt(0)->IsRegister()) { \
402 __ asm_instr(i.InputRegister(0), i.InputImmediate(1)); \
403 } else { \
404 __ asm_instr(i.InputOperand(0), i.InputImmediate(1)); \
405 } \
406 } else { \
407 if (instr->InputAt(1)->IsRegister()) { \
408 __ asm_instr(i.InputRegister(0), i.InputRegister(1)); \
409 } else { \
410 __ asm_instr(i.InputRegister(0), i.InputOperand(1)); \
411 } \
412 } \
413 } \
414 } while (0)
415
416 #define ASSEMBLE_MULT(asm_instr) \
417 do { \
418 if (HasImmediateInput(instr, 1)) { \
419 if (instr->InputAt(0)->IsRegister()) { \
420 __ asm_instr(i.OutputRegister(), i.InputRegister(0), \
421 i.InputImmediate(1)); \
422 } else { \
423 __ asm_instr(i.OutputRegister(), i.InputOperand(0), \
424 i.InputImmediate(1)); \
425 } \
426 } else { \
427 if (instr->InputAt(1)->IsRegister()) { \
428 __ asm_instr(i.OutputRegister(), i.InputRegister(1)); \
429 } else { \
430 __ asm_instr(i.OutputRegister(), i.InputOperand(1)); \
431 } \
432 } \
433 } while (0)
434
435
436 #define ASSEMBLE_SHIFT(asm_instr, width) \
437 do { \
438 if (HasImmediateInput(instr, 1)) { \
439 if (instr->Output()->IsRegister()) { \
440 __ asm_instr(i.OutputRegister(), Immediate(i.InputInt##width(1))); \
441 } else { \
442 __ asm_instr(i.OutputOperand(), Immediate(i.InputInt##width(1))); \
443 } \
444 } else { \
445 if (instr->Output()->IsRegister()) { \
446 __ asm_instr##_cl(i.OutputRegister()); \
447 } else { \
448 __ asm_instr##_cl(i.OutputOperand()); \
449 } \
450 } \
451 } while (0)
452
453
454 #define ASSEMBLE_MOVX(asm_instr) \
455 do { \
456 if (instr->addressing_mode() != kMode_None) { \
457 __ asm_instr(i.OutputRegister(), i.MemoryOperand()); \
458 } else if (instr->InputAt(0)->IsRegister()) { \
459 __ asm_instr(i.OutputRegister(), i.InputRegister(0)); \
460 } else { \
461 __ asm_instr(i.OutputRegister(), i.InputOperand(0)); \
462 } \
463 } while (0)
464
465 #define ASSEMBLE_SSE_BINOP(asm_instr) \
466 do { \
467 if (instr->InputAt(1)->IsFPRegister()) { \
468 __ asm_instr(i.InputDoubleRegister(0), i.InputDoubleRegister(1)); \
469 } else { \
470 __ asm_instr(i.InputDoubleRegister(0), i.InputOperand(1)); \
471 } \
472 } while (0)
473
474 #define ASSEMBLE_SSE_UNOP(asm_instr) \
475 do { \
476 if (instr->InputAt(0)->IsFPRegister()) { \
477 __ asm_instr(i.OutputDoubleRegister(), i.InputDoubleRegister(0)); \
478 } else { \
479 __ asm_instr(i.OutputDoubleRegister(), i.InputOperand(0)); \
480 } \
481 } while (0)
482
483 #define ASSEMBLE_AVX_BINOP(asm_instr) \
484 do { \
485 CpuFeatureScope avx_scope(tasm(), AVX); \
486 if (instr->InputAt(1)->IsFPRegister()) { \
487 __ asm_instr(i.OutputDoubleRegister(), i.InputDoubleRegister(0), \
488 i.InputDoubleRegister(1)); \
489 } else { \
490 __ asm_instr(i.OutputDoubleRegister(), i.InputDoubleRegister(0), \
491 i.InputOperand(1)); \
492 } \
493 } while (0)
494
495 #define ASSEMBLE_IEEE754_BINOP(name) \
496 do { \
497 __ PrepareCallCFunction(2); \
498 __ CallCFunction(ExternalReference::ieee754_##name##_function(), 2); \
499 } while (false)
500
501 #define ASSEMBLE_IEEE754_UNOP(name) \
502 do { \
503 __ PrepareCallCFunction(1); \
504 __ CallCFunction(ExternalReference::ieee754_##name##_function(), 1); \
505 } while (false)
506
507 #define ASSEMBLE_ATOMIC_BINOP(bin_inst, mov_inst, cmpxchg_inst) \
508 do { \
509 Label binop; \
510 __ bind(&binop); \
511 __ mov_inst(rax, i.MemoryOperand(1)); \
512 __ movl(i.TempRegister(0), rax); \
513 __ bin_inst(i.TempRegister(0), i.InputRegister(0)); \
514 __ lock(); \
515 __ cmpxchg_inst(i.MemoryOperand(1), i.TempRegister(0)); \
516 __ j(not_equal, &binop); \
517 } while (false)
518
519 #define ASSEMBLE_ATOMIC64_BINOP(bin_inst, mov_inst, cmpxchg_inst) \
520 do { \
521 Label binop; \
522 __ bind(&binop); \
523 __ mov_inst(rax, i.MemoryOperand(1)); \
524 __ movq(i.TempRegister(0), rax); \
525 __ bin_inst(i.TempRegister(0), i.InputRegister(0)); \
526 __ lock(); \
527 __ cmpxchg_inst(i.MemoryOperand(1), i.TempRegister(0)); \
528 __ j(not_equal, &binop); \
529 } while (false)
530
AssembleDeconstructFrame()531 void CodeGenerator::AssembleDeconstructFrame() {
532 unwinding_info_writer_.MarkFrameDeconstructed(__ pc_offset());
533 __ movq(rsp, rbp);
534 __ popq(rbp);
535 }
536
AssemblePrepareTailCall()537 void CodeGenerator::AssemblePrepareTailCall() {
538 if (frame_access_state()->has_frame()) {
539 __ movq(rbp, MemOperand(rbp, 0));
540 }
541 frame_access_state()->SetFrameAccessToSP();
542 }
543
AssemblePopArgumentsAdaptorFrame(Register args_reg,Register scratch1,Register scratch2,Register scratch3)544 void CodeGenerator::AssemblePopArgumentsAdaptorFrame(Register args_reg,
545 Register scratch1,
546 Register scratch2,
547 Register scratch3) {
548 DCHECK(!AreAliased(args_reg, scratch1, scratch2, scratch3));
549 Label done;
550
551 // Check if current frame is an arguments adaptor frame.
552 __ cmpp(Operand(rbp, CommonFrameConstants::kContextOrFrameTypeOffset),
553 Immediate(StackFrame::TypeToMarker(StackFrame::ARGUMENTS_ADAPTOR)));
554 __ j(not_equal, &done, Label::kNear);
555
556 // Load arguments count from current arguments adaptor frame (note, it
557 // does not include receiver).
558 Register caller_args_count_reg = scratch1;
559 __ SmiUntag(caller_args_count_reg,
560 Operand(rbp, ArgumentsAdaptorFrameConstants::kLengthOffset));
561
562 ParameterCount callee_args_count(args_reg);
563 __ PrepareForTailCall(callee_args_count, caller_args_count_reg, scratch2,
564 scratch3);
565 __ bind(&done);
566 }
567
568 namespace {
569
AdjustStackPointerForTailCall(Assembler * assembler,FrameAccessState * state,int new_slot_above_sp,bool allow_shrinkage=true)570 void AdjustStackPointerForTailCall(Assembler* assembler,
571 FrameAccessState* state,
572 int new_slot_above_sp,
573 bool allow_shrinkage = true) {
574 int current_sp_offset = state->GetSPToFPSlotCount() +
575 StandardFrameConstants::kFixedSlotCountAboveFp;
576 int stack_slot_delta = new_slot_above_sp - current_sp_offset;
577 if (stack_slot_delta > 0) {
578 assembler->subq(rsp, Immediate(stack_slot_delta * kPointerSize));
579 state->IncreaseSPDelta(stack_slot_delta);
580 } else if (allow_shrinkage && stack_slot_delta < 0) {
581 assembler->addq(rsp, Immediate(-stack_slot_delta * kPointerSize));
582 state->IncreaseSPDelta(stack_slot_delta);
583 }
584 }
585
586 } // namespace
587
AssembleTailCallBeforeGap(Instruction * instr,int first_unused_stack_slot)588 void CodeGenerator::AssembleTailCallBeforeGap(Instruction* instr,
589 int first_unused_stack_slot) {
590 CodeGenerator::PushTypeFlags flags(kImmediatePush | kScalarPush);
591 ZoneVector<MoveOperands*> pushes(zone());
592 GetPushCompatibleMoves(instr, flags, &pushes);
593
594 if (!pushes.empty() &&
595 (LocationOperand::cast(pushes.back()->destination()).index() + 1 ==
596 first_unused_stack_slot)) {
597 X64OperandConverter g(this, instr);
598 for (auto move : pushes) {
599 LocationOperand destination_location(
600 LocationOperand::cast(move->destination()));
601 InstructionOperand source(move->source());
602 AdjustStackPointerForTailCall(tasm(), frame_access_state(),
603 destination_location.index());
604 if (source.IsStackSlot()) {
605 LocationOperand source_location(LocationOperand::cast(source));
606 __ Push(g.SlotToOperand(source_location.index()));
607 } else if (source.IsRegister()) {
608 LocationOperand source_location(LocationOperand::cast(source));
609 __ Push(source_location.GetRegister());
610 } else if (source.IsImmediate()) {
611 __ Push(Immediate(ImmediateOperand::cast(source).inline_value()));
612 } else {
613 // Pushes of non-scalar data types is not supported.
614 UNIMPLEMENTED();
615 }
616 frame_access_state()->IncreaseSPDelta(1);
617 move->Eliminate();
618 }
619 }
620 AdjustStackPointerForTailCall(tasm(), frame_access_state(),
621 first_unused_stack_slot, false);
622 }
623
AssembleTailCallAfterGap(Instruction * instr,int first_unused_stack_slot)624 void CodeGenerator::AssembleTailCallAfterGap(Instruction* instr,
625 int first_unused_stack_slot) {
626 AdjustStackPointerForTailCall(tasm(), frame_access_state(),
627 first_unused_stack_slot);
628 }
629
630 // Check that {kJavaScriptCallCodeStartRegister} is correct.
AssembleCodeStartRegisterCheck()631 void CodeGenerator::AssembleCodeStartRegisterCheck() {
632 __ ComputeCodeStartAddress(rbx);
633 __ cmpq(rbx, kJavaScriptCallCodeStartRegister);
634 __ Assert(equal, AbortReason::kWrongFunctionCodeStart);
635 }
636
637 // Check if the code object is marked for deoptimization. If it is, then it
638 // jumps to the CompileLazyDeoptimizedCode builtin. In order to do this we need
639 // to:
640 // 1. read from memory the word that contains that bit, which can be found in
641 // the flags in the referenced {CodeDataContainer} object;
642 // 2. test kMarkedForDeoptimizationBit in those flags; and
643 // 3. if it is not zero then it jumps to the builtin.
BailoutIfDeoptimized()644 void CodeGenerator::BailoutIfDeoptimized() {
645 int offset = Code::kCodeDataContainerOffset - Code::kHeaderSize;
646 __ movp(rbx, Operand(kJavaScriptCallCodeStartRegister, offset));
647 __ testl(FieldOperand(rbx, CodeDataContainer::kKindSpecificFlagsOffset),
648 Immediate(1 << Code::kMarkedForDeoptimizationBit));
649 // Ensure we're not serializing (otherwise we'd need to use an indirection to
650 // access the builtin below).
651 DCHECK(!isolate()->ShouldLoadConstantsFromRootList());
652 Handle<Code> code = isolate()->builtins()->builtin_handle(
653 Builtins::kCompileLazyDeoptimizedCode);
654 __ j(not_zero, code, RelocInfo::CODE_TARGET);
655 }
656
GenerateSpeculationPoisonFromCodeStartRegister()657 void CodeGenerator::GenerateSpeculationPoisonFromCodeStartRegister() {
658 // Set a mask which has all bits set in the normal case, but has all
659 // bits cleared if we are speculatively executing the wrong PC.
660 __ ComputeCodeStartAddress(rbx);
661 __ xorq(kSpeculationPoisonRegister, kSpeculationPoisonRegister);
662 __ cmpp(kJavaScriptCallCodeStartRegister, rbx);
663 __ movp(rbx, Immediate(-1));
664 __ cmovq(equal, kSpeculationPoisonRegister, rbx);
665 }
666
AssembleRegisterArgumentPoisoning()667 void CodeGenerator::AssembleRegisterArgumentPoisoning() {
668 __ andq(kJSFunctionRegister, kSpeculationPoisonRegister);
669 __ andq(kContextRegister, kSpeculationPoisonRegister);
670 __ andq(rsp, kSpeculationPoisonRegister);
671 }
672
673 // Assembles an instruction after register allocation, producing machine code.
AssembleArchInstruction(Instruction * instr)674 CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
675 Instruction* instr) {
676 X64OperandConverter i(this, instr);
677 InstructionCode opcode = instr->opcode();
678 ArchOpcode arch_opcode = ArchOpcodeField::decode(opcode);
679 switch (arch_opcode) {
680 case kArchCallCodeObject: {
681 if (HasImmediateInput(instr, 0)) {
682 Handle<Code> code = i.InputCode(0);
683 __ Call(code, RelocInfo::CODE_TARGET);
684 } else {
685 Register reg = i.InputRegister(0);
686 DCHECK_IMPLIES(
687 HasCallDescriptorFlag(instr, CallDescriptor::kFixedTargetRegister),
688 reg == kJavaScriptCallCodeStartRegister);
689 __ addp(reg, Immediate(Code::kHeaderSize - kHeapObjectTag));
690 if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
691 __ RetpolineCall(reg);
692 } else {
693 __ call(reg);
694 }
695 }
696 RecordCallPosition(instr);
697 frame_access_state()->ClearSPDelta();
698 break;
699 }
700 case kArchCallWasmFunction: {
701 if (HasImmediateInput(instr, 0)) {
702 Constant constant = i.ToConstant(instr->InputAt(0));
703 Address wasm_code = static_cast<Address>(constant.ToInt64());
704 if (DetermineStubCallMode() == StubCallMode::kCallWasmRuntimeStub) {
705 __ near_call(wasm_code, constant.rmode());
706 } else {
707 if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
708 __ RetpolineCall(wasm_code, constant.rmode());
709 } else {
710 __ Call(wasm_code, constant.rmode());
711 }
712 }
713 } else {
714 Register reg = i.InputRegister(0);
715 if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
716 __ RetpolineCall(reg);
717 } else {
718 __ call(reg);
719 }
720 }
721 RecordCallPosition(instr);
722 frame_access_state()->ClearSPDelta();
723 break;
724 }
725 case kArchTailCallCodeObjectFromJSFunction:
726 case kArchTailCallCodeObject: {
727 if (arch_opcode == kArchTailCallCodeObjectFromJSFunction) {
728 AssemblePopArgumentsAdaptorFrame(kJavaScriptCallArgCountRegister,
729 i.TempRegister(0), i.TempRegister(1),
730 i.TempRegister(2));
731 }
732 if (HasImmediateInput(instr, 0)) {
733 Handle<Code> code = i.InputCode(0);
734 __ Jump(code, RelocInfo::CODE_TARGET);
735 } else {
736 Register reg = i.InputRegister(0);
737 DCHECK_IMPLIES(
738 HasCallDescriptorFlag(instr, CallDescriptor::kFixedTargetRegister),
739 reg == kJavaScriptCallCodeStartRegister);
740 __ addp(reg, Immediate(Code::kHeaderSize - kHeapObjectTag));
741 if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
742 __ RetpolineJump(reg);
743 } else {
744 __ jmp(reg);
745 }
746 }
747 unwinding_info_writer_.MarkBlockWillExit();
748 frame_access_state()->ClearSPDelta();
749 frame_access_state()->SetFrameAccessToDefault();
750 break;
751 }
752 case kArchTailCallWasm: {
753 if (HasImmediateInput(instr, 0)) {
754 Constant constant = i.ToConstant(instr->InputAt(0));
755 Address wasm_code = static_cast<Address>(constant.ToInt64());
756 if (DetermineStubCallMode() == StubCallMode::kCallWasmRuntimeStub) {
757 __ near_jmp(wasm_code, constant.rmode());
758 } else {
759 __ Move(kScratchRegister, wasm_code, constant.rmode());
760 __ jmp(kScratchRegister);
761 }
762 } else {
763 Register reg = i.InputRegister(0);
764 if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
765 __ RetpolineJump(reg);
766 } else {
767 __ jmp(reg);
768 }
769 }
770 unwinding_info_writer_.MarkBlockWillExit();
771 frame_access_state()->ClearSPDelta();
772 frame_access_state()->SetFrameAccessToDefault();
773 break;
774 }
775 case kArchTailCallAddress: {
776 CHECK(!HasImmediateInput(instr, 0));
777 Register reg = i.InputRegister(0);
778 DCHECK_IMPLIES(
779 HasCallDescriptorFlag(instr, CallDescriptor::kFixedTargetRegister),
780 reg == kJavaScriptCallCodeStartRegister);
781 if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
782 __ RetpolineJump(reg);
783 } else {
784 __ jmp(reg);
785 }
786 unwinding_info_writer_.MarkBlockWillExit();
787 frame_access_state()->ClearSPDelta();
788 frame_access_state()->SetFrameAccessToDefault();
789 break;
790 }
791 case kArchCallJSFunction: {
792 Register func = i.InputRegister(0);
793 if (FLAG_debug_code) {
794 // Check the function's context matches the context argument.
795 __ cmpp(rsi, FieldOperand(func, JSFunction::kContextOffset));
796 __ Assert(equal, AbortReason::kWrongFunctionContext);
797 }
798 static_assert(kJavaScriptCallCodeStartRegister == rcx, "ABI mismatch");
799 __ movp(rcx, FieldOperand(func, JSFunction::kCodeOffset));
800 __ addp(rcx, Immediate(Code::kHeaderSize - kHeapObjectTag));
801 __ call(rcx);
802 frame_access_state()->ClearSPDelta();
803 RecordCallPosition(instr);
804 break;
805 }
806 case kArchPrepareCallCFunction: {
807 // Frame alignment requires using FP-relative frame addressing.
808 frame_access_state()->SetFrameAccessToFP();
809 int const num_parameters = MiscField::decode(instr->opcode());
810 __ PrepareCallCFunction(num_parameters);
811 break;
812 }
813 case kArchSaveCallerRegisters: {
814 fp_mode_ =
815 static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode()));
816 DCHECK(fp_mode_ == kDontSaveFPRegs || fp_mode_ == kSaveFPRegs);
817 // kReturnRegister0 should have been saved before entering the stub.
818 int bytes = __ PushCallerSaved(fp_mode_, kReturnRegister0);
819 DCHECK_EQ(0, bytes % kPointerSize);
820 DCHECK_EQ(0, frame_access_state()->sp_delta());
821 frame_access_state()->IncreaseSPDelta(bytes / kPointerSize);
822 DCHECK(!caller_registers_saved_);
823 caller_registers_saved_ = true;
824 break;
825 }
826 case kArchRestoreCallerRegisters: {
827 DCHECK(fp_mode_ ==
828 static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode())));
829 DCHECK(fp_mode_ == kDontSaveFPRegs || fp_mode_ == kSaveFPRegs);
830 // Don't overwrite the returned value.
831 int bytes = __ PopCallerSaved(fp_mode_, kReturnRegister0);
832 frame_access_state()->IncreaseSPDelta(-(bytes / kPointerSize));
833 DCHECK_EQ(0, frame_access_state()->sp_delta());
834 DCHECK(caller_registers_saved_);
835 caller_registers_saved_ = false;
836 break;
837 }
838 case kArchPrepareTailCall:
839 AssemblePrepareTailCall();
840 break;
841 case kArchCallCFunction: {
842 int const num_parameters = MiscField::decode(instr->opcode());
843 if (HasImmediateInput(instr, 0)) {
844 ExternalReference ref = i.InputExternalReference(0);
845 __ CallCFunction(ref, num_parameters);
846 } else {
847 Register func = i.InputRegister(0);
848 __ CallCFunction(func, num_parameters);
849 }
850 frame_access_state()->SetFrameAccessToDefault();
851 // Ideally, we should decrement SP delta to match the change of stack
852 // pointer in CallCFunction. However, for certain architectures (e.g.
853 // ARM), there may be more strict alignment requirement, causing old SP
854 // to be saved on the stack. In those cases, we can not calculate the SP
855 // delta statically.
856 frame_access_state()->ClearSPDelta();
857 if (caller_registers_saved_) {
858 // Need to re-sync SP delta introduced in kArchSaveCallerRegisters.
859 // Here, we assume the sequence to be:
860 // kArchSaveCallerRegisters;
861 // kArchCallCFunction;
862 // kArchRestoreCallerRegisters;
863 int bytes =
864 __ RequiredStackSizeForCallerSaved(fp_mode_, kReturnRegister0);
865 frame_access_state()->IncreaseSPDelta(bytes / kPointerSize);
866 }
867 // TODO(tebbi): Do we need an lfence here?
868 break;
869 }
870 case kArchJmp:
871 AssembleArchJump(i.InputRpo(0));
872 break;
873 case kArchBinarySearchSwitch:
874 AssembleArchBinarySearchSwitch(instr);
875 break;
876 case kArchLookupSwitch:
877 AssembleArchLookupSwitch(instr);
878 break;
879 case kArchTableSwitch:
880 AssembleArchTableSwitch(instr);
881 break;
882 case kArchComment:
883 __ RecordComment(reinterpret_cast<const char*>(i.InputInt64(0)));
884 break;
885 case kArchDebugAbort:
886 DCHECK(i.InputRegister(0) == rdx);
887 if (!frame_access_state()->has_frame()) {
888 // We don't actually want to generate a pile of code for this, so just
889 // claim there is a stack frame, without generating one.
890 FrameScope scope(tasm(), StackFrame::NONE);
891 __ Call(isolate()->builtins()->builtin_handle(Builtins::kAbortJS),
892 RelocInfo::CODE_TARGET);
893 } else {
894 __ Call(isolate()->builtins()->builtin_handle(Builtins::kAbortJS),
895 RelocInfo::CODE_TARGET);
896 }
897 __ int3();
898 unwinding_info_writer_.MarkBlockWillExit();
899 break;
900 case kArchDebugBreak:
901 __ int3();
902 break;
903 case kArchThrowTerminator:
904 unwinding_info_writer_.MarkBlockWillExit();
905 break;
906 case kArchNop:
907 // don't emit code for nops.
908 break;
909 case kArchDeoptimize: {
910 int deopt_state_id =
911 BuildTranslation(instr, -1, 0, OutputFrameStateCombine::Ignore());
912 CodeGenResult result =
913 AssembleDeoptimizerCall(deopt_state_id, current_source_position_);
914 if (result != kSuccess) return result;
915 unwinding_info_writer_.MarkBlockWillExit();
916 break;
917 }
918 case kArchRet:
919 AssembleReturn(instr->InputAt(0));
920 break;
921 case kArchStackPointer:
922 __ movq(i.OutputRegister(), rsp);
923 break;
924 case kArchFramePointer:
925 __ movq(i.OutputRegister(), rbp);
926 break;
927 case kArchParentFramePointer:
928 if (frame_access_state()->has_frame()) {
929 __ movq(i.OutputRegister(), Operand(rbp, 0));
930 } else {
931 __ movq(i.OutputRegister(), rbp);
932 }
933 break;
934 case kArchTruncateDoubleToI: {
935 auto result = i.OutputRegister();
936 auto input = i.InputDoubleRegister(0);
937 auto ool = new (zone()) OutOfLineTruncateDoubleToI(
938 this, result, input, DetermineStubCallMode(),
939 &unwinding_info_writer_);
940 // We use Cvttsd2siq instead of Cvttsd2si due to performance reasons. The
941 // use of Cvttsd2siq requires the movl below to avoid sign extension.
942 __ Cvttsd2siq(result, input);
943 __ cmpq(result, Immediate(1));
944 __ j(overflow, ool->entry());
945 __ bind(ool->exit());
946 __ movl(result, result);
947 break;
948 }
949 case kArchStoreWithWriteBarrier: {
950 RecordWriteMode mode =
951 static_cast<RecordWriteMode>(MiscField::decode(instr->opcode()));
952 Register object = i.InputRegister(0);
953 size_t index = 0;
954 Operand operand = i.MemoryOperand(&index);
955 Register value = i.InputRegister(index);
956 Register scratch0 = i.TempRegister(0);
957 Register scratch1 = i.TempRegister(1);
958 auto ool = new (zone()) OutOfLineRecordWrite(this, object, operand, value,
959 scratch0, scratch1, mode);
960 __ movp(operand, value);
961 __ CheckPageFlag(object, scratch0,
962 MemoryChunk::kPointersFromHereAreInterestingMask,
963 not_zero, ool->entry());
964 __ bind(ool->exit());
965 break;
966 }
967 case kArchWordPoisonOnSpeculation:
968 DCHECK_EQ(i.OutputRegister(), i.InputRegister(0));
969 __ andq(i.InputRegister(0), kSpeculationPoisonRegister);
970 break;
971 case kLFence:
972 __ lfence();
973 break;
974 case kArchStackSlot: {
975 FrameOffset offset =
976 frame_access_state()->GetFrameOffset(i.InputInt32(0));
977 Register base = offset.from_stack_pointer() ? rsp : rbp;
978 __ leaq(i.OutputRegister(), Operand(base, offset.offset()));
979 break;
980 }
981 case kIeee754Float64Acos:
982 ASSEMBLE_IEEE754_UNOP(acos);
983 break;
984 case kIeee754Float64Acosh:
985 ASSEMBLE_IEEE754_UNOP(acosh);
986 break;
987 case kIeee754Float64Asin:
988 ASSEMBLE_IEEE754_UNOP(asin);
989 break;
990 case kIeee754Float64Asinh:
991 ASSEMBLE_IEEE754_UNOP(asinh);
992 break;
993 case kIeee754Float64Atan:
994 ASSEMBLE_IEEE754_UNOP(atan);
995 break;
996 case kIeee754Float64Atanh:
997 ASSEMBLE_IEEE754_UNOP(atanh);
998 break;
999 case kIeee754Float64Atan2:
1000 ASSEMBLE_IEEE754_BINOP(atan2);
1001 break;
1002 case kIeee754Float64Cbrt:
1003 ASSEMBLE_IEEE754_UNOP(cbrt);
1004 break;
1005 case kIeee754Float64Cos:
1006 ASSEMBLE_IEEE754_UNOP(cos);
1007 break;
1008 case kIeee754Float64Cosh:
1009 ASSEMBLE_IEEE754_UNOP(cosh);
1010 break;
1011 case kIeee754Float64Exp:
1012 ASSEMBLE_IEEE754_UNOP(exp);
1013 break;
1014 case kIeee754Float64Expm1:
1015 ASSEMBLE_IEEE754_UNOP(expm1);
1016 break;
1017 case kIeee754Float64Log:
1018 ASSEMBLE_IEEE754_UNOP(log);
1019 break;
1020 case kIeee754Float64Log1p:
1021 ASSEMBLE_IEEE754_UNOP(log1p);
1022 break;
1023 case kIeee754Float64Log2:
1024 ASSEMBLE_IEEE754_UNOP(log2);
1025 break;
1026 case kIeee754Float64Log10:
1027 ASSEMBLE_IEEE754_UNOP(log10);
1028 break;
1029 case kIeee754Float64Pow: {
1030 // TODO(bmeurer): Improve integration of the stub.
1031 __ Movsd(xmm2, xmm0);
1032 __ Call(BUILTIN_CODE(isolate(), MathPowInternal), RelocInfo::CODE_TARGET);
1033 __ Movsd(xmm0, xmm3);
1034 break;
1035 }
1036 case kIeee754Float64Sin:
1037 ASSEMBLE_IEEE754_UNOP(sin);
1038 break;
1039 case kIeee754Float64Sinh:
1040 ASSEMBLE_IEEE754_UNOP(sinh);
1041 break;
1042 case kIeee754Float64Tan:
1043 ASSEMBLE_IEEE754_UNOP(tan);
1044 break;
1045 case kIeee754Float64Tanh:
1046 ASSEMBLE_IEEE754_UNOP(tanh);
1047 break;
1048 case kX64Add32:
1049 ASSEMBLE_BINOP(addl);
1050 break;
1051 case kX64Add:
1052 ASSEMBLE_BINOP(addq);
1053 break;
1054 case kX64Sub32:
1055 ASSEMBLE_BINOP(subl);
1056 break;
1057 case kX64Sub:
1058 ASSEMBLE_BINOP(subq);
1059 break;
1060 case kX64And32:
1061 ASSEMBLE_BINOP(andl);
1062 break;
1063 case kX64And:
1064 ASSEMBLE_BINOP(andq);
1065 break;
1066 case kX64Cmp8:
1067 ASSEMBLE_COMPARE(cmpb);
1068 break;
1069 case kX64Cmp16:
1070 ASSEMBLE_COMPARE(cmpw);
1071 break;
1072 case kX64Cmp32:
1073 ASSEMBLE_COMPARE(cmpl);
1074 break;
1075 case kX64Cmp:
1076 ASSEMBLE_COMPARE(cmpq);
1077 break;
1078 case kX64Test8:
1079 ASSEMBLE_COMPARE(testb);
1080 break;
1081 case kX64Test16:
1082 ASSEMBLE_COMPARE(testw);
1083 break;
1084 case kX64Test32:
1085 ASSEMBLE_COMPARE(testl);
1086 break;
1087 case kX64Test:
1088 ASSEMBLE_COMPARE(testq);
1089 break;
1090 case kX64Imul32:
1091 ASSEMBLE_MULT(imull);
1092 break;
1093 case kX64Imul:
1094 ASSEMBLE_MULT(imulq);
1095 break;
1096 case kX64ImulHigh32:
1097 if (instr->InputAt(1)->IsRegister()) {
1098 __ imull(i.InputRegister(1));
1099 } else {
1100 __ imull(i.InputOperand(1));
1101 }
1102 break;
1103 case kX64UmulHigh32:
1104 if (instr->InputAt(1)->IsRegister()) {
1105 __ mull(i.InputRegister(1));
1106 } else {
1107 __ mull(i.InputOperand(1));
1108 }
1109 break;
1110 case kX64Idiv32:
1111 __ cdq();
1112 __ idivl(i.InputRegister(1));
1113 break;
1114 case kX64Idiv:
1115 __ cqo();
1116 __ idivq(i.InputRegister(1));
1117 break;
1118 case kX64Udiv32:
1119 __ xorl(rdx, rdx);
1120 __ divl(i.InputRegister(1));
1121 break;
1122 case kX64Udiv:
1123 __ xorq(rdx, rdx);
1124 __ divq(i.InputRegister(1));
1125 break;
1126 case kX64Not:
1127 ASSEMBLE_UNOP(notq);
1128 break;
1129 case kX64Not32:
1130 ASSEMBLE_UNOP(notl);
1131 break;
1132 case kX64Neg:
1133 ASSEMBLE_UNOP(negq);
1134 break;
1135 case kX64Neg32:
1136 ASSEMBLE_UNOP(negl);
1137 break;
1138 case kX64Or32:
1139 ASSEMBLE_BINOP(orl);
1140 break;
1141 case kX64Or:
1142 ASSEMBLE_BINOP(orq);
1143 break;
1144 case kX64Xor32:
1145 ASSEMBLE_BINOP(xorl);
1146 break;
1147 case kX64Xor:
1148 ASSEMBLE_BINOP(xorq);
1149 break;
1150 case kX64Shl32:
1151 ASSEMBLE_SHIFT(shll, 5);
1152 break;
1153 case kX64Shl:
1154 ASSEMBLE_SHIFT(shlq, 6);
1155 break;
1156 case kX64Shr32:
1157 ASSEMBLE_SHIFT(shrl, 5);
1158 break;
1159 case kX64Shr:
1160 ASSEMBLE_SHIFT(shrq, 6);
1161 break;
1162 case kX64Sar32:
1163 ASSEMBLE_SHIFT(sarl, 5);
1164 break;
1165 case kX64Sar:
1166 ASSEMBLE_SHIFT(sarq, 6);
1167 break;
1168 case kX64Ror32:
1169 ASSEMBLE_SHIFT(rorl, 5);
1170 break;
1171 case kX64Ror:
1172 ASSEMBLE_SHIFT(rorq, 6);
1173 break;
1174 case kX64Lzcnt:
1175 if (instr->InputAt(0)->IsRegister()) {
1176 __ Lzcntq(i.OutputRegister(), i.InputRegister(0));
1177 } else {
1178 __ Lzcntq(i.OutputRegister(), i.InputOperand(0));
1179 }
1180 break;
1181 case kX64Lzcnt32:
1182 if (instr->InputAt(0)->IsRegister()) {
1183 __ Lzcntl(i.OutputRegister(), i.InputRegister(0));
1184 } else {
1185 __ Lzcntl(i.OutputRegister(), i.InputOperand(0));
1186 }
1187 break;
1188 case kX64Tzcnt:
1189 if (instr->InputAt(0)->IsRegister()) {
1190 __ Tzcntq(i.OutputRegister(), i.InputRegister(0));
1191 } else {
1192 __ Tzcntq(i.OutputRegister(), i.InputOperand(0));
1193 }
1194 break;
1195 case kX64Tzcnt32:
1196 if (instr->InputAt(0)->IsRegister()) {
1197 __ Tzcntl(i.OutputRegister(), i.InputRegister(0));
1198 } else {
1199 __ Tzcntl(i.OutputRegister(), i.InputOperand(0));
1200 }
1201 break;
1202 case kX64Popcnt:
1203 if (instr->InputAt(0)->IsRegister()) {
1204 __ Popcntq(i.OutputRegister(), i.InputRegister(0));
1205 } else {
1206 __ Popcntq(i.OutputRegister(), i.InputOperand(0));
1207 }
1208 break;
1209 case kX64Popcnt32:
1210 if (instr->InputAt(0)->IsRegister()) {
1211 __ Popcntl(i.OutputRegister(), i.InputRegister(0));
1212 } else {
1213 __ Popcntl(i.OutputRegister(), i.InputOperand(0));
1214 }
1215 break;
1216 case kX64Bswap:
1217 __ bswapq(i.OutputRegister());
1218 break;
1219 case kX64Bswap32:
1220 __ bswapl(i.OutputRegister());
1221 break;
1222 case kSSEFloat32Cmp:
1223 ASSEMBLE_SSE_BINOP(Ucomiss);
1224 break;
1225 case kSSEFloat32Add:
1226 ASSEMBLE_SSE_BINOP(addss);
1227 break;
1228 case kSSEFloat32Sub:
1229 ASSEMBLE_SSE_BINOP(subss);
1230 break;
1231 case kSSEFloat32Mul:
1232 ASSEMBLE_SSE_BINOP(mulss);
1233 break;
1234 case kSSEFloat32Div:
1235 ASSEMBLE_SSE_BINOP(divss);
1236 // Don't delete this mov. It may improve performance on some CPUs,
1237 // when there is a (v)mulss depending on the result.
1238 __ movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
1239 break;
1240 case kSSEFloat32Abs: {
1241 // TODO(bmeurer): Use RIP relative 128-bit constants.
1242 __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
1243 __ psrlq(kScratchDoubleReg, 33);
1244 __ andps(i.OutputDoubleRegister(), kScratchDoubleReg);
1245 break;
1246 }
1247 case kSSEFloat32Neg: {
1248 // TODO(bmeurer): Use RIP relative 128-bit constants.
1249 __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
1250 __ psllq(kScratchDoubleReg, 31);
1251 __ xorps(i.OutputDoubleRegister(), kScratchDoubleReg);
1252 break;
1253 }
1254 case kSSEFloat32Sqrt:
1255 ASSEMBLE_SSE_UNOP(sqrtss);
1256 break;
1257 case kSSEFloat32ToFloat64:
1258 ASSEMBLE_SSE_UNOP(Cvtss2sd);
1259 break;
1260 case kSSEFloat32Round: {
1261 CpuFeatureScope sse_scope(tasm(), SSE4_1);
1262 RoundingMode const mode =
1263 static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
1264 __ Roundss(i.OutputDoubleRegister(), i.InputDoubleRegister(0), mode);
1265 break;
1266 }
1267 case kSSEFloat32ToInt32:
1268 if (instr->InputAt(0)->IsFPRegister()) {
1269 __ Cvttss2si(i.OutputRegister(), i.InputDoubleRegister(0));
1270 } else {
1271 __ Cvttss2si(i.OutputRegister(), i.InputOperand(0));
1272 }
1273 break;
1274 case kSSEFloat32ToUint32: {
1275 if (instr->InputAt(0)->IsFPRegister()) {
1276 __ Cvttss2siq(i.OutputRegister(), i.InputDoubleRegister(0));
1277 } else {
1278 __ Cvttss2siq(i.OutputRegister(), i.InputOperand(0));
1279 }
1280 break;
1281 }
1282 case kSSEFloat64Cmp:
1283 ASSEMBLE_SSE_BINOP(Ucomisd);
1284 break;
1285 case kSSEFloat64Add:
1286 ASSEMBLE_SSE_BINOP(addsd);
1287 break;
1288 case kSSEFloat64Sub:
1289 ASSEMBLE_SSE_BINOP(subsd);
1290 break;
1291 case kSSEFloat64Mul:
1292 ASSEMBLE_SSE_BINOP(mulsd);
1293 break;
1294 case kSSEFloat64Div:
1295 ASSEMBLE_SSE_BINOP(divsd);
1296 // Don't delete this mov. It may improve performance on some CPUs,
1297 // when there is a (v)mulsd depending on the result.
1298 __ Movapd(i.OutputDoubleRegister(), i.OutputDoubleRegister());
1299 break;
1300 case kSSEFloat64Mod: {
1301 __ subq(rsp, Immediate(kDoubleSize));
1302 unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
1303 kDoubleSize);
1304 // Move values to st(0) and st(1).
1305 __ Movsd(Operand(rsp, 0), i.InputDoubleRegister(1));
1306 __ fld_d(Operand(rsp, 0));
1307 __ Movsd(Operand(rsp, 0), i.InputDoubleRegister(0));
1308 __ fld_d(Operand(rsp, 0));
1309 // Loop while fprem isn't done.
1310 Label mod_loop;
1311 __ bind(&mod_loop);
1312 // This instructions traps on all kinds inputs, but we are assuming the
1313 // floating point control word is set to ignore them all.
1314 __ fprem();
1315 // The following 2 instruction implicitly use rax.
1316 __ fnstsw_ax();
1317 if (CpuFeatures::IsSupported(SAHF)) {
1318 CpuFeatureScope sahf_scope(tasm(), SAHF);
1319 __ sahf();
1320 } else {
1321 __ shrl(rax, Immediate(8));
1322 __ andl(rax, Immediate(0xFF));
1323 __ pushq(rax);
1324 unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
1325 kPointerSize);
1326 __ popfq();
1327 unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
1328 -kPointerSize);
1329 }
1330 __ j(parity_even, &mod_loop);
1331 // Move output to stack and clean up.
1332 __ fstp(1);
1333 __ fstp_d(Operand(rsp, 0));
1334 __ Movsd(i.OutputDoubleRegister(), Operand(rsp, 0));
1335 __ addq(rsp, Immediate(kDoubleSize));
1336 unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
1337 -kDoubleSize);
1338 break;
1339 }
1340 case kSSEFloat32Max: {
1341 Label compare_nan, compare_swap, done_compare;
1342 if (instr->InputAt(1)->IsFPRegister()) {
1343 __ Ucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1344 } else {
1345 __ Ucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
1346 }
1347 auto ool =
1348 new (zone()) OutOfLineLoadFloat32NaN(this, i.OutputDoubleRegister());
1349 __ j(parity_even, ool->entry());
1350 __ j(above, &done_compare, Label::kNear);
1351 __ j(below, &compare_swap, Label::kNear);
1352 __ Movmskps(kScratchRegister, i.InputDoubleRegister(0));
1353 __ testl(kScratchRegister, Immediate(1));
1354 __ j(zero, &done_compare, Label::kNear);
1355 __ bind(&compare_swap);
1356 if (instr->InputAt(1)->IsFPRegister()) {
1357 __ Movss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1358 } else {
1359 __ Movss(i.InputDoubleRegister(0), i.InputOperand(1));
1360 }
1361 __ bind(&done_compare);
1362 __ bind(ool->exit());
1363 break;
1364 }
1365 case kSSEFloat32Min: {
1366 Label compare_swap, done_compare;
1367 if (instr->InputAt(1)->IsFPRegister()) {
1368 __ Ucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1369 } else {
1370 __ Ucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
1371 }
1372 auto ool =
1373 new (zone()) OutOfLineLoadFloat32NaN(this, i.OutputDoubleRegister());
1374 __ j(parity_even, ool->entry());
1375 __ j(below, &done_compare, Label::kNear);
1376 __ j(above, &compare_swap, Label::kNear);
1377 if (instr->InputAt(1)->IsFPRegister()) {
1378 __ Movmskps(kScratchRegister, i.InputDoubleRegister(1));
1379 } else {
1380 __ Movss(kScratchDoubleReg, i.InputOperand(1));
1381 __ Movmskps(kScratchRegister, kScratchDoubleReg);
1382 }
1383 __ testl(kScratchRegister, Immediate(1));
1384 __ j(zero, &done_compare, Label::kNear);
1385 __ bind(&compare_swap);
1386 if (instr->InputAt(1)->IsFPRegister()) {
1387 __ Movss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1388 } else {
1389 __ Movss(i.InputDoubleRegister(0), i.InputOperand(1));
1390 }
1391 __ bind(&done_compare);
1392 __ bind(ool->exit());
1393 break;
1394 }
1395 case kSSEFloat64Max: {
1396 Label compare_nan, compare_swap, done_compare;
1397 if (instr->InputAt(1)->IsFPRegister()) {
1398 __ Ucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1399 } else {
1400 __ Ucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
1401 }
1402 auto ool =
1403 new (zone()) OutOfLineLoadFloat64NaN(this, i.OutputDoubleRegister());
1404 __ j(parity_even, ool->entry());
1405 __ j(above, &done_compare, Label::kNear);
1406 __ j(below, &compare_swap, Label::kNear);
1407 __ Movmskpd(kScratchRegister, i.InputDoubleRegister(0));
1408 __ testl(kScratchRegister, Immediate(1));
1409 __ j(zero, &done_compare, Label::kNear);
1410 __ bind(&compare_swap);
1411 if (instr->InputAt(1)->IsFPRegister()) {
1412 __ Movsd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1413 } else {
1414 __ Movsd(i.InputDoubleRegister(0), i.InputOperand(1));
1415 }
1416 __ bind(&done_compare);
1417 __ bind(ool->exit());
1418 break;
1419 }
1420 case kSSEFloat64Min: {
1421 Label compare_swap, done_compare;
1422 if (instr->InputAt(1)->IsFPRegister()) {
1423 __ Ucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1424 } else {
1425 __ Ucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
1426 }
1427 auto ool =
1428 new (zone()) OutOfLineLoadFloat64NaN(this, i.OutputDoubleRegister());
1429 __ j(parity_even, ool->entry());
1430 __ j(below, &done_compare, Label::kNear);
1431 __ j(above, &compare_swap, Label::kNear);
1432 if (instr->InputAt(1)->IsFPRegister()) {
1433 __ Movmskpd(kScratchRegister, i.InputDoubleRegister(1));
1434 } else {
1435 __ Movsd(kScratchDoubleReg, i.InputOperand(1));
1436 __ Movmskpd(kScratchRegister, kScratchDoubleReg);
1437 }
1438 __ testl(kScratchRegister, Immediate(1));
1439 __ j(zero, &done_compare, Label::kNear);
1440 __ bind(&compare_swap);
1441 if (instr->InputAt(1)->IsFPRegister()) {
1442 __ Movsd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1443 } else {
1444 __ Movsd(i.InputDoubleRegister(0), i.InputOperand(1));
1445 }
1446 __ bind(&done_compare);
1447 __ bind(ool->exit());
1448 break;
1449 }
1450 case kSSEFloat64Abs: {
1451 // TODO(bmeurer): Use RIP relative 128-bit constants.
1452 __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
1453 __ psrlq(kScratchDoubleReg, 1);
1454 __ andpd(i.OutputDoubleRegister(), kScratchDoubleReg);
1455 break;
1456 }
1457 case kSSEFloat64Neg: {
1458 // TODO(bmeurer): Use RIP relative 128-bit constants.
1459 __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
1460 __ psllq(kScratchDoubleReg, 63);
1461 __ xorpd(i.OutputDoubleRegister(), kScratchDoubleReg);
1462 break;
1463 }
1464 case kSSEFloat64Sqrt:
1465 ASSEMBLE_SSE_UNOP(Sqrtsd);
1466 break;
1467 case kSSEFloat64Round: {
1468 CpuFeatureScope sse_scope(tasm(), SSE4_1);
1469 RoundingMode const mode =
1470 static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
1471 __ Roundsd(i.OutputDoubleRegister(), i.InputDoubleRegister(0), mode);
1472 break;
1473 }
1474 case kSSEFloat64ToFloat32:
1475 ASSEMBLE_SSE_UNOP(Cvtsd2ss);
1476 break;
1477 case kSSEFloat64ToInt32:
1478 if (instr->InputAt(0)->IsFPRegister()) {
1479 __ Cvttsd2si(i.OutputRegister(), i.InputDoubleRegister(0));
1480 } else {
1481 __ Cvttsd2si(i.OutputRegister(), i.InputOperand(0));
1482 }
1483 break;
1484 case kSSEFloat64ToUint32: {
1485 if (instr->InputAt(0)->IsFPRegister()) {
1486 __ Cvttsd2siq(i.OutputRegister(), i.InputDoubleRegister(0));
1487 } else {
1488 __ Cvttsd2siq(i.OutputRegister(), i.InputOperand(0));
1489 }
1490 if (MiscField::decode(instr->opcode())) {
1491 __ AssertZeroExtended(i.OutputRegister());
1492 }
1493 break;
1494 }
1495 case kSSEFloat32ToInt64:
1496 if (instr->InputAt(0)->IsFPRegister()) {
1497 __ Cvttss2siq(i.OutputRegister(), i.InputDoubleRegister(0));
1498 } else {
1499 __ Cvttss2siq(i.OutputRegister(), i.InputOperand(0));
1500 }
1501 if (instr->OutputCount() > 1) {
1502 __ Set(i.OutputRegister(1), 1);
1503 Label done;
1504 Label fail;
1505 __ Move(kScratchDoubleReg, static_cast<float>(INT64_MIN));
1506 if (instr->InputAt(0)->IsFPRegister()) {
1507 __ Ucomiss(kScratchDoubleReg, i.InputDoubleRegister(0));
1508 } else {
1509 __ Ucomiss(kScratchDoubleReg, i.InputOperand(0));
1510 }
1511 // If the input is NaN, then the conversion fails.
1512 __ j(parity_even, &fail);
1513 // If the input is INT64_MIN, then the conversion succeeds.
1514 __ j(equal, &done);
1515 __ cmpq(i.OutputRegister(0), Immediate(1));
1516 // If the conversion results in INT64_MIN, but the input was not
1517 // INT64_MIN, then the conversion fails.
1518 __ j(no_overflow, &done);
1519 __ bind(&fail);
1520 __ Set(i.OutputRegister(1), 0);
1521 __ bind(&done);
1522 }
1523 break;
1524 case kSSEFloat64ToInt64:
1525 if (instr->InputAt(0)->IsFPRegister()) {
1526 __ Cvttsd2siq(i.OutputRegister(0), i.InputDoubleRegister(0));
1527 } else {
1528 __ Cvttsd2siq(i.OutputRegister(0), i.InputOperand(0));
1529 }
1530 if (instr->OutputCount() > 1) {
1531 __ Set(i.OutputRegister(1), 1);
1532 Label done;
1533 Label fail;
1534 __ Move(kScratchDoubleReg, static_cast<double>(INT64_MIN));
1535 if (instr->InputAt(0)->IsFPRegister()) {
1536 __ Ucomisd(kScratchDoubleReg, i.InputDoubleRegister(0));
1537 } else {
1538 __ Ucomisd(kScratchDoubleReg, i.InputOperand(0));
1539 }
1540 // If the input is NaN, then the conversion fails.
1541 __ j(parity_even, &fail);
1542 // If the input is INT64_MIN, then the conversion succeeds.
1543 __ j(equal, &done);
1544 __ cmpq(i.OutputRegister(0), Immediate(1));
1545 // If the conversion results in INT64_MIN, but the input was not
1546 // INT64_MIN, then the conversion fails.
1547 __ j(no_overflow, &done);
1548 __ bind(&fail);
1549 __ Set(i.OutputRegister(1), 0);
1550 __ bind(&done);
1551 }
1552 break;
1553 case kSSEFloat32ToUint64: {
1554 Label fail;
1555 if (instr->OutputCount() > 1) __ Set(i.OutputRegister(1), 0);
1556 if (instr->InputAt(0)->IsFPRegister()) {
1557 __ Cvttss2uiq(i.OutputRegister(), i.InputDoubleRegister(0), &fail);
1558 } else {
1559 __ Cvttss2uiq(i.OutputRegister(), i.InputOperand(0), &fail);
1560 }
1561 if (instr->OutputCount() > 1) __ Set(i.OutputRegister(1), 1);
1562 __ bind(&fail);
1563 break;
1564 }
1565 case kSSEFloat64ToUint64: {
1566 Label fail;
1567 if (instr->OutputCount() > 1) __ Set(i.OutputRegister(1), 0);
1568 if (instr->InputAt(0)->IsFPRegister()) {
1569 __ Cvttsd2uiq(i.OutputRegister(), i.InputDoubleRegister(0), &fail);
1570 } else {
1571 __ Cvttsd2uiq(i.OutputRegister(), i.InputOperand(0), &fail);
1572 }
1573 if (instr->OutputCount() > 1) __ Set(i.OutputRegister(1), 1);
1574 __ bind(&fail);
1575 break;
1576 }
1577 case kSSEInt32ToFloat64:
1578 if (instr->InputAt(0)->IsRegister()) {
1579 __ Cvtlsi2sd(i.OutputDoubleRegister(), i.InputRegister(0));
1580 } else {
1581 __ Cvtlsi2sd(i.OutputDoubleRegister(), i.InputOperand(0));
1582 }
1583 break;
1584 case kSSEInt32ToFloat32:
1585 if (instr->InputAt(0)->IsRegister()) {
1586 __ Cvtlsi2ss(i.OutputDoubleRegister(), i.InputRegister(0));
1587 } else {
1588 __ Cvtlsi2ss(i.OutputDoubleRegister(), i.InputOperand(0));
1589 }
1590 break;
1591 case kSSEInt64ToFloat32:
1592 if (instr->InputAt(0)->IsRegister()) {
1593 __ Cvtqsi2ss(i.OutputDoubleRegister(), i.InputRegister(0));
1594 } else {
1595 __ Cvtqsi2ss(i.OutputDoubleRegister(), i.InputOperand(0));
1596 }
1597 break;
1598 case kSSEInt64ToFloat64:
1599 if (instr->InputAt(0)->IsRegister()) {
1600 __ Cvtqsi2sd(i.OutputDoubleRegister(), i.InputRegister(0));
1601 } else {
1602 __ Cvtqsi2sd(i.OutputDoubleRegister(), i.InputOperand(0));
1603 }
1604 break;
1605 case kSSEUint64ToFloat32:
1606 if (instr->InputAt(0)->IsRegister()) {
1607 __ Cvtqui2ss(i.OutputDoubleRegister(), i.InputRegister(0));
1608 } else {
1609 __ Cvtqui2ss(i.OutputDoubleRegister(), i.InputOperand(0));
1610 }
1611 break;
1612 case kSSEUint64ToFloat64:
1613 if (instr->InputAt(0)->IsRegister()) {
1614 __ Cvtqui2sd(i.OutputDoubleRegister(), i.InputRegister(0));
1615 } else {
1616 __ Cvtqui2sd(i.OutputDoubleRegister(), i.InputOperand(0));
1617 }
1618 break;
1619 case kSSEUint32ToFloat64:
1620 if (instr->InputAt(0)->IsRegister()) {
1621 __ Cvtlui2sd(i.OutputDoubleRegister(), i.InputRegister(0));
1622 } else {
1623 __ Cvtlui2sd(i.OutputDoubleRegister(), i.InputOperand(0));
1624 }
1625 break;
1626 case kSSEUint32ToFloat32:
1627 if (instr->InputAt(0)->IsRegister()) {
1628 __ Cvtlui2ss(i.OutputDoubleRegister(), i.InputRegister(0));
1629 } else {
1630 __ Cvtlui2ss(i.OutputDoubleRegister(), i.InputOperand(0));
1631 }
1632 break;
1633 case kSSEFloat64ExtractLowWord32:
1634 if (instr->InputAt(0)->IsFPStackSlot()) {
1635 __ movl(i.OutputRegister(), i.InputOperand(0));
1636 } else {
1637 __ Movd(i.OutputRegister(), i.InputDoubleRegister(0));
1638 }
1639 break;
1640 case kSSEFloat64ExtractHighWord32:
1641 if (instr->InputAt(0)->IsFPStackSlot()) {
1642 __ movl(i.OutputRegister(), i.InputOperand(0, kDoubleSize / 2));
1643 } else {
1644 __ Pextrd(i.OutputRegister(), i.InputDoubleRegister(0), 1);
1645 }
1646 break;
1647 case kSSEFloat64InsertLowWord32:
1648 if (instr->InputAt(1)->IsRegister()) {
1649 __ Pinsrd(i.OutputDoubleRegister(), i.InputRegister(1), 0);
1650 } else {
1651 __ Pinsrd(i.OutputDoubleRegister(), i.InputOperand(1), 0);
1652 }
1653 break;
1654 case kSSEFloat64InsertHighWord32:
1655 if (instr->InputAt(1)->IsRegister()) {
1656 __ Pinsrd(i.OutputDoubleRegister(), i.InputRegister(1), 1);
1657 } else {
1658 __ Pinsrd(i.OutputDoubleRegister(), i.InputOperand(1), 1);
1659 }
1660 break;
1661 case kSSEFloat64LoadLowWord32:
1662 if (instr->InputAt(0)->IsRegister()) {
1663 __ Movd(i.OutputDoubleRegister(), i.InputRegister(0));
1664 } else {
1665 __ Movd(i.OutputDoubleRegister(), i.InputOperand(0));
1666 }
1667 break;
1668 case kAVXFloat32Cmp: {
1669 CpuFeatureScope avx_scope(tasm(), AVX);
1670 if (instr->InputAt(1)->IsFPRegister()) {
1671 __ vucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1672 } else {
1673 __ vucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
1674 }
1675 break;
1676 }
1677 case kAVXFloat32Add:
1678 ASSEMBLE_AVX_BINOP(vaddss);
1679 break;
1680 case kAVXFloat32Sub:
1681 ASSEMBLE_AVX_BINOP(vsubss);
1682 break;
1683 case kAVXFloat32Mul:
1684 ASSEMBLE_AVX_BINOP(vmulss);
1685 break;
1686 case kAVXFloat32Div:
1687 ASSEMBLE_AVX_BINOP(vdivss);
1688 // Don't delete this mov. It may improve performance on some CPUs,
1689 // when there is a (v)mulss depending on the result.
1690 __ Movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
1691 break;
1692 case kAVXFloat64Cmp: {
1693 CpuFeatureScope avx_scope(tasm(), AVX);
1694 if (instr->InputAt(1)->IsFPRegister()) {
1695 __ vucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1696 } else {
1697 __ vucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
1698 }
1699 break;
1700 }
1701 case kAVXFloat64Add:
1702 ASSEMBLE_AVX_BINOP(vaddsd);
1703 break;
1704 case kAVXFloat64Sub:
1705 ASSEMBLE_AVX_BINOP(vsubsd);
1706 break;
1707 case kAVXFloat64Mul:
1708 ASSEMBLE_AVX_BINOP(vmulsd);
1709 break;
1710 case kAVXFloat64Div:
1711 ASSEMBLE_AVX_BINOP(vdivsd);
1712 // Don't delete this mov. It may improve performance on some CPUs,
1713 // when there is a (v)mulsd depending on the result.
1714 __ Movapd(i.OutputDoubleRegister(), i.OutputDoubleRegister());
1715 break;
1716 case kAVXFloat32Abs: {
1717 // TODO(bmeurer): Use RIP relative 128-bit constants.
1718 CpuFeatureScope avx_scope(tasm(), AVX);
1719 __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
1720 __ vpsrlq(kScratchDoubleReg, kScratchDoubleReg, 33);
1721 if (instr->InputAt(0)->IsFPRegister()) {
1722 __ vandps(i.OutputDoubleRegister(), kScratchDoubleReg,
1723 i.InputDoubleRegister(0));
1724 } else {
1725 __ vandps(i.OutputDoubleRegister(), kScratchDoubleReg,
1726 i.InputOperand(0));
1727 }
1728 break;
1729 }
1730 case kAVXFloat32Neg: {
1731 // TODO(bmeurer): Use RIP relative 128-bit constants.
1732 CpuFeatureScope avx_scope(tasm(), AVX);
1733 __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
1734 __ vpsllq(kScratchDoubleReg, kScratchDoubleReg, 31);
1735 if (instr->InputAt(0)->IsFPRegister()) {
1736 __ vxorps(i.OutputDoubleRegister(), kScratchDoubleReg,
1737 i.InputDoubleRegister(0));
1738 } else {
1739 __ vxorps(i.OutputDoubleRegister(), kScratchDoubleReg,
1740 i.InputOperand(0));
1741 }
1742 break;
1743 }
1744 case kAVXFloat64Abs: {
1745 // TODO(bmeurer): Use RIP relative 128-bit constants.
1746 CpuFeatureScope avx_scope(tasm(), AVX);
1747 __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
1748 __ vpsrlq(kScratchDoubleReg, kScratchDoubleReg, 1);
1749 if (instr->InputAt(0)->IsFPRegister()) {
1750 __ vandpd(i.OutputDoubleRegister(), kScratchDoubleReg,
1751 i.InputDoubleRegister(0));
1752 } else {
1753 __ vandpd(i.OutputDoubleRegister(), kScratchDoubleReg,
1754 i.InputOperand(0));
1755 }
1756 break;
1757 }
1758 case kAVXFloat64Neg: {
1759 // TODO(bmeurer): Use RIP relative 128-bit constants.
1760 CpuFeatureScope avx_scope(tasm(), AVX);
1761 __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
1762 __ vpsllq(kScratchDoubleReg, kScratchDoubleReg, 63);
1763 if (instr->InputAt(0)->IsFPRegister()) {
1764 __ vxorpd(i.OutputDoubleRegister(), kScratchDoubleReg,
1765 i.InputDoubleRegister(0));
1766 } else {
1767 __ vxorpd(i.OutputDoubleRegister(), kScratchDoubleReg,
1768 i.InputOperand(0));
1769 }
1770 break;
1771 }
1772 case kSSEFloat64SilenceNaN:
1773 __ Xorpd(kScratchDoubleReg, kScratchDoubleReg);
1774 __ Subsd(i.InputDoubleRegister(0), kScratchDoubleReg);
1775 break;
1776 case kX64Movsxbl:
1777 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1778 ASSEMBLE_MOVX(movsxbl);
1779 __ AssertZeroExtended(i.OutputRegister());
1780 EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1781 break;
1782 case kX64Movzxbl:
1783 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1784 ASSEMBLE_MOVX(movzxbl);
1785 __ AssertZeroExtended(i.OutputRegister());
1786 EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1787 break;
1788 case kX64Movsxbq:
1789 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1790 ASSEMBLE_MOVX(movsxbq);
1791 EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1792 break;
1793 case kX64Movzxbq:
1794 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1795 ASSEMBLE_MOVX(movzxbq);
1796 __ AssertZeroExtended(i.OutputRegister());
1797 EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1798 break;
1799 case kX64Movb: {
1800 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1801 size_t index = 0;
1802 Operand operand = i.MemoryOperand(&index);
1803 if (HasImmediateInput(instr, index)) {
1804 __ movb(operand, Immediate(i.InputInt8(index)));
1805 } else {
1806 __ movb(operand, i.InputRegister(index));
1807 }
1808 EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1809 break;
1810 }
1811 case kX64Movsxwl:
1812 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1813 ASSEMBLE_MOVX(movsxwl);
1814 __ AssertZeroExtended(i.OutputRegister());
1815 EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1816 break;
1817 case kX64Movzxwl:
1818 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1819 ASSEMBLE_MOVX(movzxwl);
1820 __ AssertZeroExtended(i.OutputRegister());
1821 EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1822 break;
1823 case kX64Movsxwq:
1824 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1825 ASSEMBLE_MOVX(movsxwq);
1826 break;
1827 case kX64Movzxwq:
1828 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1829 ASSEMBLE_MOVX(movzxwq);
1830 __ AssertZeroExtended(i.OutputRegister());
1831 EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1832 break;
1833 case kX64Movw: {
1834 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1835 size_t index = 0;
1836 Operand operand = i.MemoryOperand(&index);
1837 if (HasImmediateInput(instr, index)) {
1838 __ movw(operand, Immediate(i.InputInt16(index)));
1839 } else {
1840 __ movw(operand, i.InputRegister(index));
1841 }
1842 EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1843 break;
1844 }
1845 case kX64Movl:
1846 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1847 if (instr->HasOutput()) {
1848 if (instr->addressing_mode() == kMode_None) {
1849 if (instr->InputAt(0)->IsRegister()) {
1850 __ movl(i.OutputRegister(), i.InputRegister(0));
1851 } else {
1852 __ movl(i.OutputRegister(), i.InputOperand(0));
1853 }
1854 } else {
1855 __ movl(i.OutputRegister(), i.MemoryOperand());
1856 }
1857 __ AssertZeroExtended(i.OutputRegister());
1858 } else {
1859 size_t index = 0;
1860 Operand operand = i.MemoryOperand(&index);
1861 if (HasImmediateInput(instr, index)) {
1862 __ movl(operand, i.InputImmediate(index));
1863 } else {
1864 __ movl(operand, i.InputRegister(index));
1865 }
1866 }
1867 EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1868 break;
1869 case kX64Movsxlq:
1870 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1871 ASSEMBLE_MOVX(movsxlq);
1872 EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1873 break;
1874 case kX64Movq:
1875 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1876 if (instr->HasOutput()) {
1877 __ movq(i.OutputRegister(), i.MemoryOperand());
1878 } else {
1879 size_t index = 0;
1880 Operand operand = i.MemoryOperand(&index);
1881 if (HasImmediateInput(instr, index)) {
1882 __ movq(operand, i.InputImmediate(index));
1883 } else {
1884 __ movq(operand, i.InputRegister(index));
1885 }
1886 }
1887 EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1888 break;
1889 case kX64Movss:
1890 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1891 if (instr->HasOutput()) {
1892 __ movss(i.OutputDoubleRegister(), i.MemoryOperand());
1893 } else {
1894 size_t index = 0;
1895 Operand operand = i.MemoryOperand(&index);
1896 __ movss(operand, i.InputDoubleRegister(index));
1897 }
1898 break;
1899 case kX64Movsd: {
1900 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1901 if (instr->HasOutput()) {
1902 const MemoryAccessMode access_mode =
1903 static_cast<MemoryAccessMode>(MiscField::decode(opcode));
1904 if (access_mode == kMemoryAccessPoisoned) {
1905 // If we have to poison the loaded value, we load into a general
1906 // purpose register first, mask it with the poison, and move the
1907 // value from the general purpose register into the double register.
1908 __ movq(kScratchRegister, i.MemoryOperand());
1909 __ andq(kScratchRegister, kSpeculationPoisonRegister);
1910 __ Movq(i.OutputDoubleRegister(), kScratchRegister);
1911 } else {
1912 __ Movsd(i.OutputDoubleRegister(), i.MemoryOperand());
1913 }
1914 } else {
1915 size_t index = 0;
1916 Operand operand = i.MemoryOperand(&index);
1917 __ Movsd(operand, i.InputDoubleRegister(index));
1918 }
1919 break;
1920 }
1921 case kX64Movdqu: {
1922 CpuFeatureScope sse_scope(tasm(), SSSE3);
1923 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1924 if (instr->HasOutput()) {
1925 __ movdqu(i.OutputSimd128Register(), i.MemoryOperand());
1926 } else {
1927 size_t index = 0;
1928 Operand operand = i.MemoryOperand(&index);
1929 __ movdqu(operand, i.InputSimd128Register(index));
1930 }
1931 break;
1932 }
1933 case kX64BitcastFI:
1934 if (instr->InputAt(0)->IsFPStackSlot()) {
1935 __ movl(i.OutputRegister(), i.InputOperand(0));
1936 } else {
1937 __ Movd(i.OutputRegister(), i.InputDoubleRegister(0));
1938 }
1939 break;
1940 case kX64BitcastDL:
1941 if (instr->InputAt(0)->IsFPStackSlot()) {
1942 __ movq(i.OutputRegister(), i.InputOperand(0));
1943 } else {
1944 __ Movq(i.OutputRegister(), i.InputDoubleRegister(0));
1945 }
1946 break;
1947 case kX64BitcastIF:
1948 if (instr->InputAt(0)->IsRegister()) {
1949 __ Movd(i.OutputDoubleRegister(), i.InputRegister(0));
1950 } else {
1951 __ movss(i.OutputDoubleRegister(), i.InputOperand(0));
1952 }
1953 break;
1954 case kX64BitcastLD:
1955 if (instr->InputAt(0)->IsRegister()) {
1956 __ Movq(i.OutputDoubleRegister(), i.InputRegister(0));
1957 } else {
1958 __ Movsd(i.OutputDoubleRegister(), i.InputOperand(0));
1959 }
1960 break;
1961 case kX64Lea32: {
1962 AddressingMode mode = AddressingModeField::decode(instr->opcode());
1963 // Shorten "leal" to "addl", "subl" or "shll" if the register allocation
1964 // and addressing mode just happens to work out. The "addl"/"subl" forms
1965 // in these cases are faster based on measurements.
1966 if (i.InputRegister(0) == i.OutputRegister()) {
1967 if (mode == kMode_MRI) {
1968 int32_t constant_summand = i.InputInt32(1);
1969 DCHECK_NE(0, constant_summand);
1970 if (constant_summand > 0) {
1971 __ addl(i.OutputRegister(), Immediate(constant_summand));
1972 } else {
1973 __ subl(i.OutputRegister(), Immediate(-constant_summand));
1974 }
1975 } else if (mode == kMode_MR1) {
1976 if (i.InputRegister(1) == i.OutputRegister()) {
1977 __ shll(i.OutputRegister(), Immediate(1));
1978 } else {
1979 __ addl(i.OutputRegister(), i.InputRegister(1));
1980 }
1981 } else if (mode == kMode_M2) {
1982 __ shll(i.OutputRegister(), Immediate(1));
1983 } else if (mode == kMode_M4) {
1984 __ shll(i.OutputRegister(), Immediate(2));
1985 } else if (mode == kMode_M8) {
1986 __ shll(i.OutputRegister(), Immediate(3));
1987 } else {
1988 __ leal(i.OutputRegister(), i.MemoryOperand());
1989 }
1990 } else if (mode == kMode_MR1 &&
1991 i.InputRegister(1) == i.OutputRegister()) {
1992 __ addl(i.OutputRegister(), i.InputRegister(0));
1993 } else {
1994 __ leal(i.OutputRegister(), i.MemoryOperand());
1995 }
1996 __ AssertZeroExtended(i.OutputRegister());
1997 break;
1998 }
1999 case kX64Lea: {
2000 AddressingMode mode = AddressingModeField::decode(instr->opcode());
2001 // Shorten "leaq" to "addq", "subq" or "shlq" if the register allocation
2002 // and addressing mode just happens to work out. The "addq"/"subq" forms
2003 // in these cases are faster based on measurements.
2004 if (i.InputRegister(0) == i.OutputRegister()) {
2005 if (mode == kMode_MRI) {
2006 int32_t constant_summand = i.InputInt32(1);
2007 if (constant_summand > 0) {
2008 __ addq(i.OutputRegister(), Immediate(constant_summand));
2009 } else if (constant_summand < 0) {
2010 __ subq(i.OutputRegister(), Immediate(-constant_summand));
2011 }
2012 } else if (mode == kMode_MR1) {
2013 if (i.InputRegister(1) == i.OutputRegister()) {
2014 __ shlq(i.OutputRegister(), Immediate(1));
2015 } else {
2016 __ addq(i.OutputRegister(), i.InputRegister(1));
2017 }
2018 } else if (mode == kMode_M2) {
2019 __ shlq(i.OutputRegister(), Immediate(1));
2020 } else if (mode == kMode_M4) {
2021 __ shlq(i.OutputRegister(), Immediate(2));
2022 } else if (mode == kMode_M8) {
2023 __ shlq(i.OutputRegister(), Immediate(3));
2024 } else {
2025 __ leaq(i.OutputRegister(), i.MemoryOperand());
2026 }
2027 } else if (mode == kMode_MR1 &&
2028 i.InputRegister(1) == i.OutputRegister()) {
2029 __ addq(i.OutputRegister(), i.InputRegister(0));
2030 } else {
2031 __ leaq(i.OutputRegister(), i.MemoryOperand());
2032 }
2033 break;
2034 }
2035 case kX64Dec32:
2036 __ decl(i.OutputRegister());
2037 break;
2038 case kX64Inc32:
2039 __ incl(i.OutputRegister());
2040 break;
2041 case kX64Push:
2042 if (AddressingModeField::decode(instr->opcode()) != kMode_None) {
2043 size_t index = 0;
2044 Operand operand = i.MemoryOperand(&index);
2045 __ pushq(operand);
2046 frame_access_state()->IncreaseSPDelta(1);
2047 unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
2048 kPointerSize);
2049 } else if (HasImmediateInput(instr, 0)) {
2050 __ pushq(i.InputImmediate(0));
2051 frame_access_state()->IncreaseSPDelta(1);
2052 unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
2053 kPointerSize);
2054 } else if (instr->InputAt(0)->IsRegister()) {
2055 __ pushq(i.InputRegister(0));
2056 frame_access_state()->IncreaseSPDelta(1);
2057 unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
2058 kPointerSize);
2059 } else if (instr->InputAt(0)->IsFloatRegister() ||
2060 instr->InputAt(0)->IsDoubleRegister()) {
2061 // TODO(titzer): use another machine instruction?
2062 __ subq(rsp, Immediate(kDoubleSize));
2063 frame_access_state()->IncreaseSPDelta(kDoubleSize / kPointerSize);
2064 unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
2065 kDoubleSize);
2066 __ Movsd(Operand(rsp, 0), i.InputDoubleRegister(0));
2067 } else if (instr->InputAt(0)->IsSimd128Register()) {
2068 // TODO(titzer): use another machine instruction?
2069 __ subq(rsp, Immediate(kSimd128Size));
2070 frame_access_state()->IncreaseSPDelta(kSimd128Size / kPointerSize);
2071 unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
2072 kSimd128Size);
2073 __ Movups(Operand(rsp, 0), i.InputSimd128Register(0));
2074 } else if (instr->InputAt(0)->IsStackSlot() ||
2075 instr->InputAt(0)->IsFloatStackSlot() ||
2076 instr->InputAt(0)->IsDoubleStackSlot()) {
2077 __ pushq(i.InputOperand(0));
2078 frame_access_state()->IncreaseSPDelta(1);
2079 unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
2080 kPointerSize);
2081 } else {
2082 DCHECK(instr->InputAt(0)->IsSimd128StackSlot());
2083 __ Movups(kScratchDoubleReg, i.InputOperand(0));
2084 // TODO(titzer): use another machine instruction?
2085 __ subq(rsp, Immediate(kSimd128Size));
2086 frame_access_state()->IncreaseSPDelta(kSimd128Size / kPointerSize);
2087 unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
2088 kSimd128Size);
2089 __ Movups(Operand(rsp, 0), kScratchDoubleReg);
2090 }
2091 break;
2092 case kX64Poke: {
2093 int slot = MiscField::decode(instr->opcode());
2094 if (HasImmediateInput(instr, 0)) {
2095 __ movq(Operand(rsp, slot * kPointerSize), i.InputImmediate(0));
2096 } else {
2097 __ movq(Operand(rsp, slot * kPointerSize), i.InputRegister(0));
2098 }
2099 break;
2100 }
2101 case kX64Peek: {
2102 int reverse_slot = i.InputInt32(0);
2103 int offset =
2104 FrameSlotToFPOffset(frame()->GetTotalFrameSlotCount() - reverse_slot);
2105 if (instr->OutputAt(0)->IsFPRegister()) {
2106 LocationOperand* op = LocationOperand::cast(instr->OutputAt(0));
2107 if (op->representation() == MachineRepresentation::kFloat64) {
2108 __ Movsd(i.OutputDoubleRegister(), Operand(rbp, offset));
2109 } else {
2110 DCHECK_EQ(MachineRepresentation::kFloat32, op->representation());
2111 __ Movss(i.OutputFloatRegister(), Operand(rbp, offset));
2112 }
2113 } else {
2114 __ movq(i.OutputRegister(), Operand(rbp, offset));
2115 }
2116 break;
2117 }
2118 // TODO(gdeepti): Get rid of redundant moves for F32x4Splat/Extract below
2119 case kX64F32x4Splat: {
2120 XMMRegister dst = i.OutputSimd128Register();
2121 if (instr->InputAt(0)->IsFPRegister()) {
2122 __ movss(dst, i.InputDoubleRegister(0));
2123 } else {
2124 __ movss(dst, i.InputOperand(0));
2125 }
2126 __ shufps(dst, dst, 0x0);
2127 break;
2128 }
2129 case kX64F32x4ExtractLane: {
2130 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2131 __ extractps(kScratchRegister, i.InputSimd128Register(0), i.InputInt8(1));
2132 __ movd(i.OutputDoubleRegister(), kScratchRegister);
2133 break;
2134 }
2135 case kX64F32x4ReplaceLane: {
2136 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2137 // The insertps instruction uses imm8[5:4] to indicate the lane
2138 // that needs to be replaced.
2139 byte select = i.InputInt8(1) << 4 & 0x30;
2140 __ insertps(i.OutputSimd128Register(), i.InputDoubleRegister(2), select);
2141 break;
2142 }
2143 case kX64F32x4Abs: {
2144 XMMRegister dst = i.OutputSimd128Register();
2145 XMMRegister src = i.InputSimd128Register(0);
2146 if (dst == src) {
2147 __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2148 __ psrld(kScratchDoubleReg, 1);
2149 __ andps(i.OutputSimd128Register(), kScratchDoubleReg);
2150 } else {
2151 __ pcmpeqd(dst, dst);
2152 __ psrld(dst, 1);
2153 __ andps(dst, i.InputSimd128Register(0));
2154 }
2155 break;
2156 }
2157 case kX64F32x4Neg: {
2158 XMMRegister dst = i.OutputSimd128Register();
2159 XMMRegister src = i.InputSimd128Register(0);
2160 if (dst == src) {
2161 __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2162 __ pslld(kScratchDoubleReg, 31);
2163 __ xorps(i.OutputSimd128Register(), kScratchDoubleReg);
2164 } else {
2165 __ pcmpeqd(dst, dst);
2166 __ pslld(dst, 31);
2167 __ xorps(dst, i.InputSimd128Register(0));
2168 }
2169 break;
2170 }
2171 case kX64F32x4RecipApprox: {
2172 __ rcpps(i.OutputSimd128Register(), i.InputSimd128Register(0));
2173 break;
2174 }
2175 case kX64F32x4RecipSqrtApprox: {
2176 __ rsqrtps(i.OutputSimd128Register(), i.InputSimd128Register(0));
2177 break;
2178 }
2179 case kX64F32x4Add: {
2180 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2181 __ addps(i.OutputSimd128Register(), i.InputSimd128Register(1));
2182 break;
2183 }
2184 case kX64F32x4AddHoriz: {
2185 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2186 CpuFeatureScope sse_scope(tasm(), SSE3);
2187 __ haddps(i.OutputSimd128Register(), i.InputSimd128Register(1));
2188 break;
2189 }
2190 case kX64F32x4Sub: {
2191 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2192 __ subps(i.OutputSimd128Register(), i.InputSimd128Register(1));
2193 break;
2194 }
2195 case kX64F32x4Mul: {
2196 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2197 __ mulps(i.OutputSimd128Register(), i.InputSimd128Register(1));
2198 break;
2199 }
2200 case kX64F32x4Min: {
2201 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2202 __ minps(i.OutputSimd128Register(), i.InputSimd128Register(1));
2203 break;
2204 }
2205 case kX64F32x4Max: {
2206 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2207 __ maxps(i.OutputSimd128Register(), i.InputSimd128Register(1));
2208 break;
2209 }
2210 case kX64F32x4Eq: {
2211 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2212 __ cmpps(i.OutputSimd128Register(), i.InputSimd128Register(1), 0x0);
2213 break;
2214 }
2215 case kX64F32x4Ne: {
2216 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2217 __ cmpps(i.OutputSimd128Register(), i.InputSimd128Register(1), 0x4);
2218 break;
2219 }
2220 case kX64F32x4Lt: {
2221 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2222 __ cmpltps(i.OutputSimd128Register(), i.InputSimd128Register(1));
2223 break;
2224 }
2225 case kX64F32x4Le: {
2226 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2227 __ cmpleps(i.OutputSimd128Register(), i.InputSimd128Register(1));
2228 break;
2229 }
2230 case kX64I32x4Splat: {
2231 XMMRegister dst = i.OutputSimd128Register();
2232 __ movd(dst, i.InputRegister(0));
2233 __ pshufd(dst, dst, 0x0);
2234 break;
2235 }
2236 case kX64I32x4ExtractLane: {
2237 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2238 __ Pextrd(i.OutputRegister(), i.InputSimd128Register(0), i.InputInt8(1));
2239 break;
2240 }
2241 case kX64I32x4ReplaceLane: {
2242 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2243 if (instr->InputAt(2)->IsRegister()) {
2244 __ Pinsrd(i.OutputSimd128Register(), i.InputRegister(2),
2245 i.InputInt8(1));
2246 } else {
2247 __ Pinsrd(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
2248 }
2249 break;
2250 }
2251 case kX64I32x4Neg: {
2252 CpuFeatureScope sse_scope(tasm(), SSSE3);
2253 XMMRegister dst = i.OutputSimd128Register();
2254 XMMRegister src = i.InputSimd128Register(0);
2255 if (dst == src) {
2256 __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2257 __ psignd(dst, kScratchDoubleReg);
2258 } else {
2259 __ pxor(dst, dst);
2260 __ psubd(dst, src);
2261 }
2262 break;
2263 }
2264 case kX64I32x4Shl: {
2265 __ pslld(i.OutputSimd128Register(), i.InputInt8(1));
2266 break;
2267 }
2268 case kX64I32x4ShrS: {
2269 __ psrad(i.OutputSimd128Register(), i.InputInt8(1));
2270 break;
2271 }
2272 case kX64I32x4Add: {
2273 __ paddd(i.OutputSimd128Register(), i.InputSimd128Register(1));
2274 break;
2275 }
2276 case kX64I32x4AddHoriz: {
2277 CpuFeatureScope sse_scope(tasm(), SSSE3);
2278 __ phaddd(i.OutputSimd128Register(), i.InputSimd128Register(1));
2279 break;
2280 }
2281 case kX64I32x4Sub: {
2282 __ psubd(i.OutputSimd128Register(), i.InputSimd128Register(1));
2283 break;
2284 }
2285 case kX64I32x4Mul: {
2286 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2287 __ pmulld(i.OutputSimd128Register(), i.InputSimd128Register(1));
2288 break;
2289 }
2290 case kX64I32x4MinS: {
2291 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2292 __ pminsd(i.OutputSimd128Register(), i.InputSimd128Register(1));
2293 break;
2294 }
2295 case kX64I32x4MaxS: {
2296 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2297 __ pmaxsd(i.OutputSimd128Register(), i.InputSimd128Register(1));
2298 break;
2299 }
2300 case kX64I32x4Eq: {
2301 __ pcmpeqd(i.OutputSimd128Register(), i.InputSimd128Register(1));
2302 break;
2303 }
2304 case kX64I32x4Ne: {
2305 __ pcmpeqd(i.OutputSimd128Register(), i.InputSimd128Register(1));
2306 __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2307 __ pxor(i.OutputSimd128Register(), kScratchDoubleReg);
2308 break;
2309 }
2310 case kX64I32x4GtS: {
2311 __ pcmpgtd(i.OutputSimd128Register(), i.InputSimd128Register(1));
2312 break;
2313 }
2314 case kX64I32x4GeS: {
2315 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2316 XMMRegister dst = i.OutputSimd128Register();
2317 XMMRegister src = i.InputSimd128Register(1);
2318 __ pminsd(dst, src);
2319 __ pcmpeqd(dst, src);
2320 break;
2321 }
2322 case kX64I32x4ShrU: {
2323 __ psrld(i.OutputSimd128Register(), i.InputInt8(1));
2324 break;
2325 }
2326 case kX64I32x4MinU: {
2327 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2328 __ pminud(i.OutputSimd128Register(), i.InputSimd128Register(1));
2329 break;
2330 }
2331 case kX64I32x4MaxU: {
2332 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2333 __ pmaxud(i.OutputSimd128Register(), i.InputSimd128Register(1));
2334 break;
2335 }
2336 case kX64I32x4GtU: {
2337 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2338 XMMRegister dst = i.OutputSimd128Register();
2339 XMMRegister src = i.InputSimd128Register(1);
2340 __ pmaxud(dst, src);
2341 __ pcmpeqd(dst, src);
2342 __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2343 __ pxor(dst, kScratchDoubleReg);
2344 break;
2345 }
2346 case kX64I32x4GeU: {
2347 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2348 XMMRegister dst = i.OutputSimd128Register();
2349 XMMRegister src = i.InputSimd128Register(1);
2350 __ pminud(dst, src);
2351 __ pcmpeqd(dst, src);
2352 break;
2353 }
2354 case kX64S128Zero: {
2355 XMMRegister dst = i.OutputSimd128Register();
2356 __ xorps(dst, dst);
2357 break;
2358 }
2359 case kX64I16x8Splat: {
2360 XMMRegister dst = i.OutputSimd128Register();
2361 __ movd(dst, i.InputRegister(0));
2362 __ pshuflw(dst, dst, 0x0);
2363 __ pshufd(dst, dst, 0x0);
2364 break;
2365 }
2366 case kX64I16x8ExtractLane: {
2367 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2368 Register dst = i.OutputRegister();
2369 __ pextrw(dst, i.InputSimd128Register(0), i.InputInt8(1));
2370 __ movsxwl(dst, dst);
2371 break;
2372 }
2373 case kX64I16x8ReplaceLane: {
2374 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2375 if (instr->InputAt(2)->IsRegister()) {
2376 __ pinsrw(i.OutputSimd128Register(), i.InputRegister(2),
2377 i.InputInt8(1));
2378 } else {
2379 __ pinsrw(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
2380 }
2381 break;
2382 }
2383 case kX64I16x8Neg: {
2384 CpuFeatureScope sse_scope(tasm(), SSSE3);
2385 XMMRegister dst = i.OutputSimd128Register();
2386 XMMRegister src = i.InputSimd128Register(0);
2387 if (dst == src) {
2388 __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2389 __ psignw(dst, kScratchDoubleReg);
2390 } else {
2391 __ pxor(dst, dst);
2392 __ psubw(dst, src);
2393 }
2394 break;
2395 }
2396 case kX64I16x8Shl: {
2397 __ psllw(i.OutputSimd128Register(), i.InputInt8(1));
2398 break;
2399 }
2400 case kX64I16x8ShrS: {
2401 __ psraw(i.OutputSimd128Register(), i.InputInt8(1));
2402 break;
2403 }
2404 case kX64I16x8Add: {
2405 __ paddw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2406 break;
2407 }
2408 case kX64I16x8AddSaturateS: {
2409 __ paddsw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2410 break;
2411 }
2412 case kX64I16x8AddHoriz: {
2413 CpuFeatureScope sse_scope(tasm(), SSSE3);
2414 __ phaddw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2415 break;
2416 }
2417 case kX64I16x8Sub: {
2418 __ psubw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2419 break;
2420 }
2421 case kX64I16x8SubSaturateS: {
2422 __ psubsw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2423 break;
2424 }
2425 case kX64I16x8Mul: {
2426 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2427 __ pmullw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2428 break;
2429 }
2430 case kX64I16x8MinS: {
2431 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2432 __ pminsw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2433 break;
2434 }
2435 case kX64I16x8MaxS: {
2436 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2437 __ pmaxsw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2438 break;
2439 }
2440 case kX64I16x8Eq: {
2441 __ pcmpeqw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2442 break;
2443 }
2444 case kX64I16x8Ne: {
2445 __ pcmpeqw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2446 __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
2447 __ pxor(i.OutputSimd128Register(), kScratchDoubleReg);
2448 break;
2449 }
2450 case kX64I16x8GtS: {
2451 __ pcmpgtw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2452 break;
2453 }
2454 case kX64I16x8GeS: {
2455 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2456 XMMRegister dst = i.OutputSimd128Register();
2457 XMMRegister src = i.InputSimd128Register(1);
2458 __ pminsw(dst, src);
2459 __ pcmpeqw(dst, src);
2460 break;
2461 }
2462 case kX64I16x8ShrU: {
2463 __ psrlw(i.OutputSimd128Register(), i.InputInt8(1));
2464 break;
2465 }
2466 case kX64I16x8AddSaturateU: {
2467 __ paddusw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2468 break;
2469 }
2470 case kX64I16x8SubSaturateU: {
2471 __ psubusw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2472 break;
2473 }
2474 case kX64I16x8MinU: {
2475 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2476 __ pminuw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2477 break;
2478 }
2479 case kX64I16x8MaxU: {
2480 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2481 __ pmaxuw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2482 break;
2483 }
2484 case kX64I16x8GtU: {
2485 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2486 XMMRegister dst = i.OutputSimd128Register();
2487 XMMRegister src = i.InputSimd128Register(1);
2488 __ pmaxuw(dst, src);
2489 __ pcmpeqw(dst, src);
2490 __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
2491 __ pxor(dst, kScratchDoubleReg);
2492 break;
2493 }
2494 case kX64I16x8GeU: {
2495 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2496 XMMRegister dst = i.OutputSimd128Register();
2497 XMMRegister src = i.InputSimd128Register(1);
2498 __ pminuw(dst, src);
2499 __ pcmpeqw(dst, src);
2500 break;
2501 }
2502 case kX64I8x16Splat: {
2503 CpuFeatureScope sse_scope(tasm(), SSSE3);
2504 XMMRegister dst = i.OutputSimd128Register();
2505 __ movd(dst, i.InputRegister(0));
2506 __ xorps(kScratchDoubleReg, kScratchDoubleReg);
2507 __ pshufb(dst, kScratchDoubleReg);
2508 break;
2509 }
2510 case kX64I8x16ExtractLane: {
2511 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2512 Register dst = i.OutputRegister();
2513 __ pextrb(dst, i.InputSimd128Register(0), i.InputInt8(1));
2514 __ movsxbl(dst, dst);
2515 break;
2516 }
2517 case kX64I8x16ReplaceLane: {
2518 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2519 if (instr->InputAt(2)->IsRegister()) {
2520 __ pinsrb(i.OutputSimd128Register(), i.InputRegister(2),
2521 i.InputInt8(1));
2522 } else {
2523 __ pinsrb(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
2524 }
2525 break;
2526 }
2527 case kX64I8x16Neg: {
2528 CpuFeatureScope sse_scope(tasm(), SSSE3);
2529 XMMRegister dst = i.OutputSimd128Register();
2530 XMMRegister src = i.InputSimd128Register(0);
2531 if (dst == src) {
2532 __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2533 __ psignb(dst, kScratchDoubleReg);
2534 } else {
2535 __ pxor(dst, dst);
2536 __ psubb(dst, src);
2537 }
2538 break;
2539 }
2540 case kX64I8x16Add: {
2541 __ paddb(i.OutputSimd128Register(), i.InputSimd128Register(1));
2542 break;
2543 }
2544 case kX64I8x16AddSaturateS: {
2545 __ paddsb(i.OutputSimd128Register(), i.InputSimd128Register(1));
2546 break;
2547 }
2548 case kX64I8x16Sub: {
2549 __ psubb(i.OutputSimd128Register(), i.InputSimd128Register(1));
2550 break;
2551 }
2552 case kX64I8x16SubSaturateS: {
2553 __ psubsb(i.OutputSimd128Register(), i.InputSimd128Register(1));
2554 break;
2555 }
2556 case kX64I8x16MinS: {
2557 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2558 __ pminsb(i.OutputSimd128Register(), i.InputSimd128Register(1));
2559 break;
2560 }
2561 case kX64I8x16MaxS: {
2562 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2563 __ pmaxsb(i.OutputSimd128Register(), i.InputSimd128Register(1));
2564 break;
2565 }
2566 case kX64I8x16Eq: {
2567 __ pcmpeqb(i.OutputSimd128Register(), i.InputSimd128Register(1));
2568 break;
2569 }
2570 case kX64I8x16Ne: {
2571 __ pcmpeqb(i.OutputSimd128Register(), i.InputSimd128Register(1));
2572 __ pcmpeqb(kScratchDoubleReg, kScratchDoubleReg);
2573 __ pxor(i.OutputSimd128Register(), kScratchDoubleReg);
2574 break;
2575 }
2576 case kX64I8x16GtS: {
2577 __ pcmpgtb(i.OutputSimd128Register(), i.InputSimd128Register(1));
2578 break;
2579 }
2580 case kX64I8x16GeS: {
2581 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2582 XMMRegister dst = i.OutputSimd128Register();
2583 XMMRegister src = i.InputSimd128Register(1);
2584 __ pminsb(dst, src);
2585 __ pcmpeqb(dst, src);
2586 break;
2587 }
2588 case kX64I8x16AddSaturateU: {
2589 __ paddusb(i.OutputSimd128Register(), i.InputSimd128Register(1));
2590 break;
2591 }
2592 case kX64I8x16SubSaturateU: {
2593 __ psubusb(i.OutputSimd128Register(), i.InputSimd128Register(1));
2594 break;
2595 }
2596 case kX64I8x16MinU: {
2597 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2598 __ pminub(i.OutputSimd128Register(), i.InputSimd128Register(1));
2599 break;
2600 }
2601 case kX64I8x16MaxU: {
2602 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2603 __ pmaxub(i.OutputSimd128Register(), i.InputSimd128Register(1));
2604 break;
2605 }
2606 case kX64I8x16GtU: {
2607 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2608 XMMRegister dst = i.OutputSimd128Register();
2609 XMMRegister src = i.InputSimd128Register(1);
2610 __ pmaxub(dst, src);
2611 __ pcmpeqb(dst, src);
2612 __ pcmpeqb(kScratchDoubleReg, kScratchDoubleReg);
2613 __ pxor(dst, kScratchDoubleReg);
2614 break;
2615 }
2616 case kX64I8x16GeU: {
2617 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2618 XMMRegister dst = i.OutputSimd128Register();
2619 XMMRegister src = i.InputSimd128Register(1);
2620 __ pminub(dst, src);
2621 __ pcmpeqb(dst, src);
2622 break;
2623 }
2624 case kX64S128And: {
2625 __ pand(i.OutputSimd128Register(), i.InputSimd128Register(1));
2626 break;
2627 }
2628 case kX64S128Or: {
2629 __ por(i.OutputSimd128Register(), i.InputSimd128Register(1));
2630 break;
2631 }
2632 case kX64S128Xor: {
2633 __ pxor(i.OutputSimd128Register(), i.InputSimd128Register(1));
2634 break;
2635 }
2636 case kX64S128Not: {
2637 XMMRegister dst = i.OutputSimd128Register();
2638 XMMRegister src = i.InputSimd128Register(0);
2639 if (dst == src) {
2640 __ movaps(kScratchDoubleReg, dst);
2641 __ pcmpeqd(dst, dst);
2642 __ pxor(dst, kScratchDoubleReg);
2643 } else {
2644 __ pcmpeqd(dst, dst);
2645 __ pxor(dst, src);
2646 }
2647
2648 break;
2649 }
2650 case kX64S128Select: {
2651 // Mask used here is stored in dst.
2652 XMMRegister dst = i.OutputSimd128Register();
2653 __ movaps(kScratchDoubleReg, i.InputSimd128Register(1));
2654 __ xorps(kScratchDoubleReg, i.InputSimd128Register(2));
2655 __ andps(dst, kScratchDoubleReg);
2656 __ xorps(dst, i.InputSimd128Register(2));
2657 break;
2658 }
2659 case kX64StackCheck:
2660 __ CompareRoot(rsp, Heap::kStackLimitRootIndex);
2661 break;
2662 case kWord32AtomicExchangeInt8: {
2663 __ xchgb(i.InputRegister(0), i.MemoryOperand(1));
2664 __ movsxbl(i.InputRegister(0), i.InputRegister(0));
2665 break;
2666 }
2667 case kWord32AtomicExchangeUint8: {
2668 __ xchgb(i.InputRegister(0), i.MemoryOperand(1));
2669 __ movzxbl(i.InputRegister(0), i.InputRegister(0));
2670 break;
2671 }
2672 case kWord32AtomicExchangeInt16: {
2673 __ xchgw(i.InputRegister(0), i.MemoryOperand(1));
2674 __ movsxwl(i.InputRegister(0), i.InputRegister(0));
2675 break;
2676 }
2677 case kWord32AtomicExchangeUint16: {
2678 __ xchgw(i.InputRegister(0), i.MemoryOperand(1));
2679 __ movzxwl(i.InputRegister(0), i.InputRegister(0));
2680 break;
2681 }
2682 case kWord32AtomicExchangeWord32: {
2683 __ xchgl(i.InputRegister(0), i.MemoryOperand(1));
2684 break;
2685 }
2686 case kWord32AtomicCompareExchangeInt8: {
2687 __ lock();
2688 __ cmpxchgb(i.MemoryOperand(2), i.InputRegister(1));
2689 __ movsxbl(rax, rax);
2690 break;
2691 }
2692 case kWord32AtomicCompareExchangeUint8: {
2693 __ lock();
2694 __ cmpxchgb(i.MemoryOperand(2), i.InputRegister(1));
2695 __ movzxbl(rax, rax);
2696 break;
2697 }
2698 case kWord32AtomicCompareExchangeInt16: {
2699 __ lock();
2700 __ cmpxchgw(i.MemoryOperand(2), i.InputRegister(1));
2701 __ movsxwl(rax, rax);
2702 break;
2703 }
2704 case kWord32AtomicCompareExchangeUint16: {
2705 __ lock();
2706 __ cmpxchgw(i.MemoryOperand(2), i.InputRegister(1));
2707 __ movzxwl(rax, rax);
2708 break;
2709 }
2710 case kWord32AtomicCompareExchangeWord32: {
2711 __ lock();
2712 __ cmpxchgl(i.MemoryOperand(2), i.InputRegister(1));
2713 break;
2714 }
2715 #define ATOMIC_BINOP_CASE(op, inst) \
2716 case kWord32Atomic##op##Int8: \
2717 ASSEMBLE_ATOMIC_BINOP(inst, movb, cmpxchgb); \
2718 __ movsxbl(rax, rax); \
2719 break; \
2720 case kWord32Atomic##op##Uint8: \
2721 ASSEMBLE_ATOMIC_BINOP(inst, movb, cmpxchgb); \
2722 __ movzxbl(rax, rax); \
2723 break; \
2724 case kWord32Atomic##op##Int16: \
2725 ASSEMBLE_ATOMIC_BINOP(inst, movw, cmpxchgw); \
2726 __ movsxwl(rax, rax); \
2727 break; \
2728 case kWord32Atomic##op##Uint16: \
2729 ASSEMBLE_ATOMIC_BINOP(inst, movw, cmpxchgw); \
2730 __ movzxwl(rax, rax); \
2731 break; \
2732 case kWord32Atomic##op##Word32: \
2733 ASSEMBLE_ATOMIC_BINOP(inst, movl, cmpxchgl); \
2734 break;
2735 ATOMIC_BINOP_CASE(Add, addl)
2736 ATOMIC_BINOP_CASE(Sub, subl)
2737 ATOMIC_BINOP_CASE(And, andl)
2738 ATOMIC_BINOP_CASE(Or, orl)
2739 ATOMIC_BINOP_CASE(Xor, xorl)
2740 #undef ATOMIC_BINOP_CASE
2741 case kX64Word64AtomicExchangeUint8: {
2742 __ xchgb(i.InputRegister(0), i.MemoryOperand(1));
2743 __ movzxbq(i.InputRegister(0), i.InputRegister(0));
2744 break;
2745 }
2746 case kX64Word64AtomicExchangeUint16: {
2747 __ xchgw(i.InputRegister(0), i.MemoryOperand(1));
2748 __ movzxwq(i.InputRegister(0), i.InputRegister(0));
2749 break;
2750 }
2751 case kX64Word64AtomicExchangeUint32: {
2752 __ xchgl(i.InputRegister(0), i.MemoryOperand(1));
2753 break;
2754 }
2755 case kX64Word64AtomicExchangeUint64: {
2756 __ xchgq(i.InputRegister(0), i.MemoryOperand(1));
2757 break;
2758 }
2759 case kX64Word64AtomicCompareExchangeUint8: {
2760 __ lock();
2761 __ cmpxchgb(i.MemoryOperand(2), i.InputRegister(1));
2762 __ movzxbq(rax, rax);
2763 break;
2764 }
2765 case kX64Word64AtomicCompareExchangeUint16: {
2766 __ lock();
2767 __ cmpxchgw(i.MemoryOperand(2), i.InputRegister(1));
2768 __ movzxwq(rax, rax);
2769 break;
2770 }
2771 case kX64Word64AtomicCompareExchangeUint32: {
2772 __ lock();
2773 __ cmpxchgl(i.MemoryOperand(2), i.InputRegister(1));
2774 break;
2775 }
2776 case kX64Word64AtomicCompareExchangeUint64: {
2777 __ lock();
2778 __ cmpxchgq(i.MemoryOperand(2), i.InputRegister(1));
2779 break;
2780 }
2781 #define ATOMIC64_BINOP_CASE(op, inst) \
2782 case kX64Word64Atomic##op##Uint8: \
2783 ASSEMBLE_ATOMIC64_BINOP(inst, movb, cmpxchgb); \
2784 __ movzxbq(rax, rax); \
2785 break; \
2786 case kX64Word64Atomic##op##Uint16: \
2787 ASSEMBLE_ATOMIC64_BINOP(inst, movw, cmpxchgw); \
2788 __ movzxwq(rax, rax); \
2789 break; \
2790 case kX64Word64Atomic##op##Uint32: \
2791 ASSEMBLE_ATOMIC64_BINOP(inst, movl, cmpxchgl); \
2792 break; \
2793 case kX64Word64Atomic##op##Uint64: \
2794 ASSEMBLE_ATOMIC64_BINOP(inst, movq, cmpxchgq); \
2795 break;
2796 ATOMIC64_BINOP_CASE(Add, addq)
2797 ATOMIC64_BINOP_CASE(Sub, subq)
2798 ATOMIC64_BINOP_CASE(And, andq)
2799 ATOMIC64_BINOP_CASE(Or, orq)
2800 ATOMIC64_BINOP_CASE(Xor, xorq)
2801 #undef ATOMIC64_BINOP_CASE
2802 case kWord32AtomicLoadInt8:
2803 case kWord32AtomicLoadUint8:
2804 case kWord32AtomicLoadInt16:
2805 case kWord32AtomicLoadUint16:
2806 case kWord32AtomicLoadWord32:
2807 case kWord32AtomicStoreWord8:
2808 case kWord32AtomicStoreWord16:
2809 case kWord32AtomicStoreWord32:
2810 case kX64Word64AtomicLoadUint8:
2811 case kX64Word64AtomicLoadUint16:
2812 case kX64Word64AtomicLoadUint32:
2813 case kX64Word64AtomicLoadUint64:
2814 case kX64Word64AtomicStoreWord8:
2815 case kX64Word64AtomicStoreWord16:
2816 case kX64Word64AtomicStoreWord32:
2817 case kX64Word64AtomicStoreWord64:
2818 UNREACHABLE(); // Won't be generated by instruction selector.
2819 break;
2820 }
2821 return kSuccess;
2822 } // NOLadability/fn_size)
2823
2824 #undef ASSEMBLE_UNOP
2825 #undef ASSEMBLE_BINOP
2826 #undef ASSEMBLE_COMPARE
2827 #undef ASSEMBLE_MULT
2828 #undef ASSEMBLE_SHIFT
2829 #undef ASSEMBLE_MOVX
2830 #undef ASSEMBLE_SSE_BINOP
2831 #undef ASSEMBLE_SSE_UNOP
2832 #undef ASSEMBLE_AVX_BINOP
2833 #undef ASSEMBLE_IEEE754_BINOP
2834 #undef ASSEMBLE_IEEE754_UNOP
2835 #undef ASSEMBLE_ATOMIC_BINOP
2836 #undef ASSEMBLE_ATOMIC64_BINOP
2837
2838 namespace {
2839
FlagsConditionToCondition(FlagsCondition condition)2840 Condition FlagsConditionToCondition(FlagsCondition condition) {
2841 switch (condition) {
2842 case kUnorderedEqual:
2843 case kEqual:
2844 return equal;
2845 case kUnorderedNotEqual:
2846 case kNotEqual:
2847 return not_equal;
2848 case kSignedLessThan:
2849 return less;
2850 case kSignedGreaterThanOrEqual:
2851 return greater_equal;
2852 case kSignedLessThanOrEqual:
2853 return less_equal;
2854 case kSignedGreaterThan:
2855 return greater;
2856 case kUnsignedLessThan:
2857 return below;
2858 case kUnsignedGreaterThanOrEqual:
2859 return above_equal;
2860 case kUnsignedLessThanOrEqual:
2861 return below_equal;
2862 case kUnsignedGreaterThan:
2863 return above;
2864 case kOverflow:
2865 return overflow;
2866 case kNotOverflow:
2867 return no_overflow;
2868 default:
2869 break;
2870 }
2871 UNREACHABLE();
2872 }
2873
2874 } // namespace
2875
2876 // Assembles branches after this instruction.
AssembleArchBranch(Instruction * instr,BranchInfo * branch)2877 void CodeGenerator::AssembleArchBranch(Instruction* instr, BranchInfo* branch) {
2878 Label::Distance flabel_distance =
2879 branch->fallthru ? Label::kNear : Label::kFar;
2880 Label* tlabel = branch->true_label;
2881 Label* flabel = branch->false_label;
2882 if (branch->condition == kUnorderedEqual) {
2883 __ j(parity_even, flabel, flabel_distance);
2884 } else if (branch->condition == kUnorderedNotEqual) {
2885 __ j(parity_even, tlabel);
2886 }
2887 __ j(FlagsConditionToCondition(branch->condition), tlabel);
2888
2889 if (!branch->fallthru) __ jmp(flabel, flabel_distance);
2890 }
2891
AssembleBranchPoisoning(FlagsCondition condition,Instruction * instr)2892 void CodeGenerator::AssembleBranchPoisoning(FlagsCondition condition,
2893 Instruction* instr) {
2894 // TODO(jarin) Handle float comparisons (kUnordered[Not]Equal).
2895 if (condition == kUnorderedEqual || condition == kUnorderedNotEqual) {
2896 return;
2897 }
2898
2899 condition = NegateFlagsCondition(condition);
2900 __ movl(kScratchRegister, Immediate(0));
2901 __ cmovq(FlagsConditionToCondition(condition), kSpeculationPoisonRegister,
2902 kScratchRegister);
2903 }
2904
AssembleArchDeoptBranch(Instruction * instr,BranchInfo * branch)2905 void CodeGenerator::AssembleArchDeoptBranch(Instruction* instr,
2906 BranchInfo* branch) {
2907 Label::Distance flabel_distance =
2908 branch->fallthru ? Label::kNear : Label::kFar;
2909 Label* tlabel = branch->true_label;
2910 Label* flabel = branch->false_label;
2911 Label nodeopt;
2912 if (branch->condition == kUnorderedEqual) {
2913 __ j(parity_even, flabel, flabel_distance);
2914 } else if (branch->condition == kUnorderedNotEqual) {
2915 __ j(parity_even, tlabel);
2916 }
2917 __ j(FlagsConditionToCondition(branch->condition), tlabel);
2918
2919 if (FLAG_deopt_every_n_times > 0) {
2920 ExternalReference counter =
2921 ExternalReference::stress_deopt_count(isolate());
2922
2923 __ pushfq();
2924 __ pushq(rax);
2925 __ load_rax(counter);
2926 __ decl(rax);
2927 __ j(not_zero, &nodeopt);
2928
2929 __ Set(rax, FLAG_deopt_every_n_times);
2930 __ store_rax(counter);
2931 __ popq(rax);
2932 __ popfq();
2933 __ jmp(tlabel);
2934
2935 __ bind(&nodeopt);
2936 __ store_rax(counter);
2937 __ popq(rax);
2938 __ popfq();
2939 }
2940
2941 if (!branch->fallthru) {
2942 __ jmp(flabel, flabel_distance);
2943 }
2944 }
2945
AssembleArchJump(RpoNumber target)2946 void CodeGenerator::AssembleArchJump(RpoNumber target) {
2947 if (!IsNextInAssemblyOrder(target)) __ jmp(GetLabel(target));
2948 }
2949
AssembleArchTrap(Instruction * instr,FlagsCondition condition)2950 void CodeGenerator::AssembleArchTrap(Instruction* instr,
2951 FlagsCondition condition) {
2952 auto ool = new (zone()) WasmOutOfLineTrap(this, instr);
2953 Label* tlabel = ool->entry();
2954 Label end;
2955 if (condition == kUnorderedEqual) {
2956 __ j(parity_even, &end);
2957 } else if (condition == kUnorderedNotEqual) {
2958 __ j(parity_even, tlabel);
2959 }
2960 __ j(FlagsConditionToCondition(condition), tlabel);
2961 __ bind(&end);
2962 }
2963
2964 // Assembles boolean materializations after this instruction.
AssembleArchBoolean(Instruction * instr,FlagsCondition condition)2965 void CodeGenerator::AssembleArchBoolean(Instruction* instr,
2966 FlagsCondition condition) {
2967 X64OperandConverter i(this, instr);
2968 Label done;
2969
2970 // Materialize a full 64-bit 1 or 0 value. The result register is always the
2971 // last output of the instruction.
2972 Label check;
2973 DCHECK_NE(0u, instr->OutputCount());
2974 Register reg = i.OutputRegister(instr->OutputCount() - 1);
2975 if (condition == kUnorderedEqual) {
2976 __ j(parity_odd, &check, Label::kNear);
2977 __ movl(reg, Immediate(0));
2978 __ jmp(&done, Label::kNear);
2979 } else if (condition == kUnorderedNotEqual) {
2980 __ j(parity_odd, &check, Label::kNear);
2981 __ movl(reg, Immediate(1));
2982 __ jmp(&done, Label::kNear);
2983 }
2984 __ bind(&check);
2985 __ setcc(FlagsConditionToCondition(condition), reg);
2986 __ movzxbl(reg, reg);
2987 __ bind(&done);
2988 }
2989
AssembleArchBinarySearchSwitch(Instruction * instr)2990 void CodeGenerator::AssembleArchBinarySearchSwitch(Instruction* instr) {
2991 X64OperandConverter i(this, instr);
2992 Register input = i.InputRegister(0);
2993 std::vector<std::pair<int32_t, Label*>> cases;
2994 for (size_t index = 2; index < instr->InputCount(); index += 2) {
2995 cases.push_back({i.InputInt32(index + 0), GetLabel(i.InputRpo(index + 1))});
2996 }
2997 AssembleArchBinarySearchSwitchRange(input, i.InputRpo(1), cases.data(),
2998 cases.data() + cases.size());
2999 }
3000
AssembleArchLookupSwitch(Instruction * instr)3001 void CodeGenerator::AssembleArchLookupSwitch(Instruction* instr) {
3002 X64OperandConverter i(this, instr);
3003 Register input = i.InputRegister(0);
3004 for (size_t index = 2; index < instr->InputCount(); index += 2) {
3005 __ cmpl(input, Immediate(i.InputInt32(index + 0)));
3006 __ j(equal, GetLabel(i.InputRpo(index + 1)));
3007 }
3008 AssembleArchJump(i.InputRpo(1));
3009 }
3010
AssembleArchTableSwitch(Instruction * instr)3011 void CodeGenerator::AssembleArchTableSwitch(Instruction* instr) {
3012 X64OperandConverter i(this, instr);
3013 Register input = i.InputRegister(0);
3014 int32_t const case_count = static_cast<int32_t>(instr->InputCount() - 2);
3015 Label** cases = zone()->NewArray<Label*>(case_count);
3016 for (int32_t index = 0; index < case_count; ++index) {
3017 cases[index] = GetLabel(i.InputRpo(index + 2));
3018 }
3019 Label* const table = AddJumpTable(cases, case_count);
3020 __ cmpl(input, Immediate(case_count));
3021 __ j(above_equal, GetLabel(i.InputRpo(1)));
3022 __ leaq(kScratchRegister, Operand(table));
3023 __ jmp(Operand(kScratchRegister, input, times_8, 0));
3024 }
3025
3026 namespace {
3027
3028 static const int kQuadWordSize = 16;
3029
3030 } // namespace
3031
FinishFrame(Frame * frame)3032 void CodeGenerator::FinishFrame(Frame* frame) {
3033 auto call_descriptor = linkage()->GetIncomingDescriptor();
3034
3035 const RegList saves_fp = call_descriptor->CalleeSavedFPRegisters();
3036 if (saves_fp != 0) {
3037 frame->AlignSavedCalleeRegisterSlots();
3038 if (saves_fp != 0) { // Save callee-saved XMM registers.
3039 const uint32_t saves_fp_count = base::bits::CountPopulation(saves_fp);
3040 frame->AllocateSavedCalleeRegisterSlots(saves_fp_count *
3041 (kQuadWordSize / kPointerSize));
3042 }
3043 }
3044 const RegList saves = call_descriptor->CalleeSavedRegisters();
3045 if (saves != 0) { // Save callee-saved registers.
3046 int count = 0;
3047 for (int i = Register::kNumRegisters - 1; i >= 0; i--) {
3048 if (((1 << i) & saves)) {
3049 ++count;
3050 }
3051 }
3052 frame->AllocateSavedCalleeRegisterSlots(count);
3053 }
3054 }
3055
AssembleConstructFrame()3056 void CodeGenerator::AssembleConstructFrame() {
3057 auto call_descriptor = linkage()->GetIncomingDescriptor();
3058 if (frame_access_state()->has_frame()) {
3059 int pc_base = __ pc_offset();
3060
3061 if (call_descriptor->IsCFunctionCall()) {
3062 __ pushq(rbp);
3063 __ movq(rbp, rsp);
3064 } else if (call_descriptor->IsJSFunctionCall()) {
3065 __ Prologue();
3066 if (call_descriptor->PushArgumentCount()) {
3067 __ pushq(kJavaScriptCallArgCountRegister);
3068 }
3069 } else {
3070 __ StubPrologue(info()->GetOutputStackFrameType());
3071 if (call_descriptor->IsWasmFunctionCall()) {
3072 __ pushq(kWasmInstanceRegister);
3073 }
3074 }
3075
3076 unwinding_info_writer_.MarkFrameConstructed(pc_base);
3077 }
3078 int shrink_slots = frame()->GetTotalFrameSlotCount() -
3079 call_descriptor->CalculateFixedFrameSize();
3080
3081 if (info()->is_osr()) {
3082 // TurboFan OSR-compiled functions cannot be entered directly.
3083 __ Abort(AbortReason::kShouldNotDirectlyEnterOsrFunction);
3084
3085 // Unoptimized code jumps directly to this entrypoint while the unoptimized
3086 // frame is still on the stack. Optimized code uses OSR values directly from
3087 // the unoptimized frame. Thus, all that needs to be done is to allocate the
3088 // remaining stack slots.
3089 if (FLAG_code_comments) __ RecordComment("-- OSR entrypoint --");
3090 osr_pc_offset_ = __ pc_offset();
3091 shrink_slots -= static_cast<int>(osr_helper()->UnoptimizedFrameSlots());
3092 ResetSpeculationPoison();
3093 }
3094
3095 const RegList saves = call_descriptor->CalleeSavedRegisters();
3096 const RegList saves_fp = call_descriptor->CalleeSavedFPRegisters();
3097
3098 if (shrink_slots > 0) {
3099 DCHECK(frame_access_state()->has_frame());
3100 if (info()->IsWasm() && shrink_slots > 128) {
3101 // For WebAssembly functions with big frames we have to do the stack
3102 // overflow check before we construct the frame. Otherwise we may not
3103 // have enough space on the stack to call the runtime for the stack
3104 // overflow.
3105 Label done;
3106
3107 // If the frame is bigger than the stack, we throw the stack overflow
3108 // exception unconditionally. Thereby we can avoid the integer overflow
3109 // check in the condition code.
3110 if (shrink_slots * kPointerSize < FLAG_stack_size * 1024) {
3111 __ movq(kScratchRegister,
3112 FieldOperand(kWasmInstanceRegister,
3113 WasmInstanceObject::kRealStackLimitAddressOffset));
3114 __ movq(kScratchRegister, Operand(kScratchRegister, 0));
3115 __ addq(kScratchRegister, Immediate(shrink_slots * kPointerSize));
3116 __ cmpq(rsp, kScratchRegister);
3117 __ j(above_equal, &done);
3118 }
3119 __ movp(rcx, FieldOperand(kWasmInstanceRegister,
3120 WasmInstanceObject::kCEntryStubOffset));
3121 __ Move(rsi, Smi::kZero);
3122 __ CallRuntimeWithCEntry(Runtime::kThrowWasmStackOverflow, rcx);
3123 ReferenceMap* reference_map = new (zone()) ReferenceMap(zone());
3124 RecordSafepoint(reference_map, Safepoint::kSimple, 0,
3125 Safepoint::kNoLazyDeopt);
3126 __ AssertUnreachable(AbortReason::kUnexpectedReturnFromWasmTrap);
3127 __ bind(&done);
3128 }
3129
3130 // Skip callee-saved and return slots, which are created below.
3131 shrink_slots -= base::bits::CountPopulation(saves);
3132 shrink_slots -=
3133 base::bits::CountPopulation(saves_fp) * (kQuadWordSize / kPointerSize);
3134 shrink_slots -= frame()->GetReturnSlotCount();
3135 if (shrink_slots > 0) {
3136 __ subq(rsp, Immediate(shrink_slots * kPointerSize));
3137 }
3138 }
3139
3140 if (saves_fp != 0) { // Save callee-saved XMM registers.
3141 const uint32_t saves_fp_count = base::bits::CountPopulation(saves_fp);
3142 const int stack_size = saves_fp_count * kQuadWordSize;
3143 // Adjust the stack pointer.
3144 __ subp(rsp, Immediate(stack_size));
3145 // Store the registers on the stack.
3146 int slot_idx = 0;
3147 for (int i = 0; i < XMMRegister::kNumRegisters; i++) {
3148 if (!((1 << i) & saves_fp)) continue;
3149 __ movdqu(Operand(rsp, kQuadWordSize * slot_idx),
3150 XMMRegister::from_code(i));
3151 slot_idx++;
3152 }
3153 }
3154
3155 if (saves != 0) { // Save callee-saved registers.
3156 for (int i = Register::kNumRegisters - 1; i >= 0; i--) {
3157 if (!((1 << i) & saves)) continue;
3158 __ pushq(Register::from_code(i));
3159 }
3160 }
3161
3162 // Allocate return slots (located after callee-saved).
3163 if (frame()->GetReturnSlotCount() > 0) {
3164 __ subq(rsp, Immediate(frame()->GetReturnSlotCount() * kPointerSize));
3165 }
3166 }
3167
AssembleReturn(InstructionOperand * pop)3168 void CodeGenerator::AssembleReturn(InstructionOperand* pop) {
3169 auto call_descriptor = linkage()->GetIncomingDescriptor();
3170
3171 // Restore registers.
3172 const RegList saves = call_descriptor->CalleeSavedRegisters();
3173 if (saves != 0) {
3174 const int returns = frame()->GetReturnSlotCount();
3175 if (returns != 0) {
3176 __ addq(rsp, Immediate(returns * kPointerSize));
3177 }
3178 for (int i = 0; i < Register::kNumRegisters; i++) {
3179 if (!((1 << i) & saves)) continue;
3180 __ popq(Register::from_code(i));
3181 }
3182 }
3183 const RegList saves_fp = call_descriptor->CalleeSavedFPRegisters();
3184 if (saves_fp != 0) {
3185 const uint32_t saves_fp_count = base::bits::CountPopulation(saves_fp);
3186 const int stack_size = saves_fp_count * kQuadWordSize;
3187 // Load the registers from the stack.
3188 int slot_idx = 0;
3189 for (int i = 0; i < XMMRegister::kNumRegisters; i++) {
3190 if (!((1 << i) & saves_fp)) continue;
3191 __ movdqu(XMMRegister::from_code(i),
3192 Operand(rsp, kQuadWordSize * slot_idx));
3193 slot_idx++;
3194 }
3195 // Adjust the stack pointer.
3196 __ addp(rsp, Immediate(stack_size));
3197 }
3198
3199 unwinding_info_writer_.MarkBlockWillExit();
3200
3201 // Might need rcx for scratch if pop_size is too big or if there is a variable
3202 // pop count.
3203 DCHECK_EQ(0u, call_descriptor->CalleeSavedRegisters() & rcx.bit());
3204 DCHECK_EQ(0u, call_descriptor->CalleeSavedRegisters() & rdx.bit());
3205 size_t pop_size = call_descriptor->StackParameterCount() * kPointerSize;
3206 X64OperandConverter g(this, nullptr);
3207 if (call_descriptor->IsCFunctionCall()) {
3208 AssembleDeconstructFrame();
3209 } else if (frame_access_state()->has_frame()) {
3210 if (pop->IsImmediate() && g.ToConstant(pop).ToInt32() == 0) {
3211 // Canonicalize JSFunction return sites for now.
3212 if (return_label_.is_bound()) {
3213 __ jmp(&return_label_);
3214 return;
3215 } else {
3216 __ bind(&return_label_);
3217 AssembleDeconstructFrame();
3218 }
3219 } else {
3220 AssembleDeconstructFrame();
3221 }
3222 }
3223
3224 if (pop->IsImmediate()) {
3225 pop_size += g.ToConstant(pop).ToInt32() * kPointerSize;
3226 CHECK_LT(pop_size, static_cast<size_t>(std::numeric_limits<int>::max()));
3227 __ Ret(static_cast<int>(pop_size), rcx);
3228 } else {
3229 Register pop_reg = g.ToRegister(pop);
3230 Register scratch_reg = pop_reg == rcx ? rdx : rcx;
3231 __ popq(scratch_reg);
3232 __ leaq(rsp, Operand(rsp, pop_reg, times_8, static_cast<int>(pop_size)));
3233 __ jmp(scratch_reg);
3234 }
3235 }
3236
FinishCode()3237 void CodeGenerator::FinishCode() { tasm()->PatchConstPool(); }
3238
AssembleMove(InstructionOperand * source,InstructionOperand * destination)3239 void CodeGenerator::AssembleMove(InstructionOperand* source,
3240 InstructionOperand* destination) {
3241 X64OperandConverter g(this, nullptr);
3242 // Helper function to write the given constant to the dst register.
3243 auto MoveConstantToRegister = [&](Register dst, Constant src) {
3244 switch (src.type()) {
3245 case Constant::kInt32: {
3246 if (RelocInfo::IsWasmPtrReference(src.rmode())) {
3247 __ movq(dst, src.ToInt64(), src.rmode());
3248 } else {
3249 int32_t value = src.ToInt32();
3250 if (value == 0) {
3251 __ xorl(dst, dst);
3252 } else {
3253 __ movl(dst, Immediate(value));
3254 }
3255 }
3256 break;
3257 }
3258 case Constant::kInt64:
3259 if (RelocInfo::IsWasmPtrReference(src.rmode())) {
3260 __ movq(dst, src.ToInt64(), src.rmode());
3261 } else {
3262 __ Set(dst, src.ToInt64());
3263 }
3264 break;
3265 case Constant::kFloat32:
3266 __ MoveNumber(dst, src.ToFloat32());
3267 break;
3268 case Constant::kFloat64:
3269 __ MoveNumber(dst, src.ToFloat64().value());
3270 break;
3271 case Constant::kExternalReference:
3272 __ Move(dst, src.ToExternalReference());
3273 break;
3274 case Constant::kHeapObject: {
3275 Handle<HeapObject> src_object = src.ToHeapObject();
3276 Heap::RootListIndex index;
3277 if (IsMaterializableFromRoot(src_object, &index)) {
3278 __ LoadRoot(dst, index);
3279 } else {
3280 __ Move(dst, src_object);
3281 }
3282 break;
3283 }
3284 case Constant::kRpoNumber:
3285 UNREACHABLE(); // TODO(dcarney): load of labels on x64.
3286 break;
3287 }
3288 };
3289 // Helper function to write the given constant to the stack.
3290 auto MoveConstantToSlot = [&](Operand dst, Constant src) {
3291 if (!RelocInfo::IsWasmPtrReference(src.rmode())) {
3292 switch (src.type()) {
3293 case Constant::kInt32:
3294 __ movq(dst, Immediate(src.ToInt32()));
3295 return;
3296 case Constant::kInt64:
3297 __ Set(dst, src.ToInt64());
3298 return;
3299 default:
3300 break;
3301 }
3302 }
3303 MoveConstantToRegister(kScratchRegister, src);
3304 __ movq(dst, kScratchRegister);
3305 };
3306 // Dispatch on the source and destination operand kinds.
3307 switch (MoveType::InferMove(source, destination)) {
3308 case MoveType::kRegisterToRegister:
3309 if (source->IsRegister()) {
3310 __ movq(g.ToRegister(destination), g.ToRegister(source));
3311 } else {
3312 DCHECK(source->IsFPRegister());
3313 __ Movapd(g.ToDoubleRegister(destination), g.ToDoubleRegister(source));
3314 }
3315 return;
3316 case MoveType::kRegisterToStack: {
3317 Operand dst = g.ToOperand(destination);
3318 if (source->IsRegister()) {
3319 __ movq(dst, g.ToRegister(source));
3320 } else {
3321 DCHECK(source->IsFPRegister());
3322 XMMRegister src = g.ToDoubleRegister(source);
3323 MachineRepresentation rep =
3324 LocationOperand::cast(source)->representation();
3325 if (rep != MachineRepresentation::kSimd128) {
3326 __ Movsd(dst, src);
3327 } else {
3328 __ Movups(dst, src);
3329 }
3330 }
3331 return;
3332 }
3333 case MoveType::kStackToRegister: {
3334 Operand src = g.ToOperand(source);
3335 if (source->IsStackSlot()) {
3336 __ movq(g.ToRegister(destination), src);
3337 } else {
3338 DCHECK(source->IsFPStackSlot());
3339 XMMRegister dst = g.ToDoubleRegister(destination);
3340 MachineRepresentation rep =
3341 LocationOperand::cast(source)->representation();
3342 if (rep != MachineRepresentation::kSimd128) {
3343 __ Movsd(dst, src);
3344 } else {
3345 __ Movups(dst, src);
3346 }
3347 }
3348 return;
3349 }
3350 case MoveType::kStackToStack: {
3351 Operand src = g.ToOperand(source);
3352 Operand dst = g.ToOperand(destination);
3353 if (source->IsStackSlot()) {
3354 // Spill on demand to use a temporary register for memory-to-memory
3355 // moves.
3356 __ movq(kScratchRegister, src);
3357 __ movq(dst, kScratchRegister);
3358 } else {
3359 MachineRepresentation rep =
3360 LocationOperand::cast(source)->representation();
3361 if (rep != MachineRepresentation::kSimd128) {
3362 __ Movsd(kScratchDoubleReg, src);
3363 __ Movsd(dst, kScratchDoubleReg);
3364 } else {
3365 DCHECK(source->IsSimd128StackSlot());
3366 __ Movups(kScratchDoubleReg, src);
3367 __ Movups(dst, kScratchDoubleReg);
3368 }
3369 }
3370 return;
3371 }
3372 case MoveType::kConstantToRegister: {
3373 Constant src = g.ToConstant(source);
3374 if (destination->IsRegister()) {
3375 MoveConstantToRegister(g.ToRegister(destination), src);
3376 } else {
3377 DCHECK(destination->IsFPRegister());
3378 XMMRegister dst = g.ToDoubleRegister(destination);
3379 if (src.type() == Constant::kFloat32) {
3380 // TODO(turbofan): Can we do better here?
3381 __ Move(dst, bit_cast<uint32_t>(src.ToFloat32()));
3382 } else {
3383 DCHECK_EQ(src.type(), Constant::kFloat64);
3384 __ Move(dst, src.ToFloat64().AsUint64());
3385 }
3386 }
3387 return;
3388 }
3389 case MoveType::kConstantToStack: {
3390 Constant src = g.ToConstant(source);
3391 Operand dst = g.ToOperand(destination);
3392 if (destination->IsStackSlot()) {
3393 MoveConstantToSlot(dst, src);
3394 } else {
3395 DCHECK(destination->IsFPStackSlot());
3396 if (src.type() == Constant::kFloat32) {
3397 __ movl(dst, Immediate(bit_cast<uint32_t>(src.ToFloat32())));
3398 } else {
3399 DCHECK_EQ(src.type(), Constant::kFloat64);
3400 __ movq(kScratchRegister, src.ToFloat64().AsUint64());
3401 __ movq(dst, kScratchRegister);
3402 }
3403 }
3404 return;
3405 }
3406 }
3407 UNREACHABLE();
3408 }
3409
3410
AssembleSwap(InstructionOperand * source,InstructionOperand * destination)3411 void CodeGenerator::AssembleSwap(InstructionOperand* source,
3412 InstructionOperand* destination) {
3413 X64OperandConverter g(this, nullptr);
3414 // Dispatch on the source and destination operand kinds. Not all
3415 // combinations are possible.
3416 switch (MoveType::InferSwap(source, destination)) {
3417 case MoveType::kRegisterToRegister: {
3418 if (source->IsRegister()) {
3419 Register src = g.ToRegister(source);
3420 Register dst = g.ToRegister(destination);
3421 __ movq(kScratchRegister, src);
3422 __ movq(src, dst);
3423 __ movq(dst, kScratchRegister);
3424 } else {
3425 DCHECK(source->IsFPRegister());
3426 XMMRegister src = g.ToDoubleRegister(source);
3427 XMMRegister dst = g.ToDoubleRegister(destination);
3428 __ Movapd(kScratchDoubleReg, src);
3429 __ Movapd(src, dst);
3430 __ Movapd(dst, kScratchDoubleReg);
3431 }
3432 return;
3433 }
3434 case MoveType::kRegisterToStack: {
3435 if (source->IsRegister()) {
3436 Register src = g.ToRegister(source);
3437 __ pushq(src);
3438 frame_access_state()->IncreaseSPDelta(1);
3439 unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
3440 kPointerSize);
3441 __ movq(src, g.ToOperand(destination));
3442 frame_access_state()->IncreaseSPDelta(-1);
3443 __ popq(g.ToOperand(destination));
3444 unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
3445 -kPointerSize);
3446 } else {
3447 DCHECK(source->IsFPRegister());
3448 XMMRegister src = g.ToDoubleRegister(source);
3449 Operand dst = g.ToOperand(destination);
3450 MachineRepresentation rep =
3451 LocationOperand::cast(source)->representation();
3452 if (rep != MachineRepresentation::kSimd128) {
3453 __ Movsd(kScratchDoubleReg, src);
3454 __ Movsd(src, dst);
3455 __ Movsd(dst, kScratchDoubleReg);
3456 } else {
3457 __ Movups(kScratchDoubleReg, src);
3458 __ Movups(src, dst);
3459 __ Movups(dst, kScratchDoubleReg);
3460 }
3461 }
3462 return;
3463 }
3464 case MoveType::kStackToStack: {
3465 Operand src = g.ToOperand(source);
3466 Operand dst = g.ToOperand(destination);
3467 MachineRepresentation rep =
3468 LocationOperand::cast(source)->representation();
3469 if (rep != MachineRepresentation::kSimd128) {
3470 Register tmp = kScratchRegister;
3471 __ movq(tmp, dst);
3472 __ pushq(src); // Then use stack to copy src to destination.
3473 unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
3474 kPointerSize);
3475 __ popq(dst);
3476 unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
3477 -kPointerSize);
3478 __ movq(src, tmp);
3479 } else {
3480 // Without AVX, misaligned reads and writes will trap. Move using the
3481 // stack, in two parts.
3482 __ movups(kScratchDoubleReg, dst); // Save dst in scratch register.
3483 __ pushq(src); // Then use stack to copy src to destination.
3484 unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
3485 kPointerSize);
3486 __ popq(dst);
3487 unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
3488 -kPointerSize);
3489 __ pushq(g.ToOperand(source, kPointerSize));
3490 unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
3491 kPointerSize);
3492 __ popq(g.ToOperand(destination, kPointerSize));
3493 unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
3494 -kPointerSize);
3495 __ movups(src, kScratchDoubleReg);
3496 }
3497 return;
3498 }
3499 default:
3500 UNREACHABLE();
3501 break;
3502 }
3503 }
3504
AssembleJumpTable(Label ** targets,size_t target_count)3505 void CodeGenerator::AssembleJumpTable(Label** targets, size_t target_count) {
3506 for (size_t index = 0; index < target_count; ++index) {
3507 __ dq(targets[index]);
3508 }
3509 }
3510
3511 #undef __
3512
3513 } // namespace compiler
3514 } // namespace internal
3515 } // namespace v8
3516