1 // Copyright 2013 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "src/compiler/code-generator.h"
6 
7 #include "src/assembler-inl.h"
8 #include "src/callable.h"
9 #include "src/compiler/code-generator-impl.h"
10 #include "src/compiler/gap-resolver.h"
11 #include "src/compiler/node-matchers.h"
12 #include "src/compiler/osr.h"
13 #include "src/frame-constants.h"
14 #include "src/frames.h"
15 #include "src/heap/heap-inl.h"
16 #include "src/ia32/assembler-ia32.h"
17 #include "src/ia32/macro-assembler-ia32.h"
18 #include "src/optimized-compilation-info.h"
19 #include "src/wasm/wasm-code-manager.h"
20 #include "src/wasm/wasm-objects.h"
21 
22 namespace v8 {
23 namespace internal {
24 namespace compiler {
25 
26 #define __ tasm()->
27 
28 #define kScratchDoubleReg xmm0
29 
30 
31 // Adds IA-32 specific methods for decoding operands.
32 class IA32OperandConverter : public InstructionOperandConverter {
33  public:
IA32OperandConverter(CodeGenerator * gen,Instruction * instr)34   IA32OperandConverter(CodeGenerator* gen, Instruction* instr)
35       : InstructionOperandConverter(gen, instr) {}
36 
InputOperand(size_t index,int extra=0)37   Operand InputOperand(size_t index, int extra = 0) {
38     return ToOperand(instr_->InputAt(index), extra);
39   }
40 
InputImmediate(size_t index)41   Immediate InputImmediate(size_t index) {
42     return ToImmediate(instr_->InputAt(index));
43   }
44 
OutputOperand()45   Operand OutputOperand() { return ToOperand(instr_->Output()); }
46 
ToOperand(InstructionOperand * op,int extra=0)47   Operand ToOperand(InstructionOperand* op, int extra = 0) {
48     if (op->IsRegister()) {
49       DCHECK_EQ(0, extra);
50       return Operand(ToRegister(op));
51     } else if (op->IsFPRegister()) {
52       DCHECK_EQ(0, extra);
53       return Operand(ToDoubleRegister(op));
54     }
55     DCHECK(op->IsStackSlot() || op->IsFPStackSlot());
56     return SlotToOperand(AllocatedOperand::cast(op)->index(), extra);
57   }
58 
SlotToOperand(int slot,int extra=0)59   Operand SlotToOperand(int slot, int extra = 0) {
60     FrameOffset offset = frame_access_state()->GetFrameOffset(slot);
61     return Operand(offset.from_stack_pointer() ? esp : ebp,
62                    offset.offset() + extra);
63   }
64 
ToImmediate(InstructionOperand * operand)65   Immediate ToImmediate(InstructionOperand* operand) {
66     Constant constant = ToConstant(operand);
67     if (constant.type() == Constant::kInt32 &&
68         RelocInfo::IsWasmReference(constant.rmode())) {
69       return Immediate(static_cast<Address>(constant.ToInt32()),
70                        constant.rmode());
71     }
72     switch (constant.type()) {
73       case Constant::kInt32:
74         return Immediate(constant.ToInt32());
75       case Constant::kFloat32:
76         return Immediate::EmbeddedNumber(constant.ToFloat32());
77       case Constant::kFloat64:
78         return Immediate::EmbeddedNumber(constant.ToFloat64().value());
79       case Constant::kExternalReference:
80         return Immediate(constant.ToExternalReference());
81       case Constant::kHeapObject:
82         return Immediate(constant.ToHeapObject());
83       case Constant::kInt64:
84         break;
85       case Constant::kRpoNumber:
86         return Immediate::CodeRelativeOffset(ToLabel(operand));
87     }
88     UNREACHABLE();
89   }
90 
NextOffset(size_t * offset)91   static size_t NextOffset(size_t* offset) {
92     size_t i = *offset;
93     (*offset)++;
94     return i;
95   }
96 
ScaleFor(AddressingMode one,AddressingMode mode)97   static ScaleFactor ScaleFor(AddressingMode one, AddressingMode mode) {
98     STATIC_ASSERT(0 == static_cast<int>(times_1));
99     STATIC_ASSERT(1 == static_cast<int>(times_2));
100     STATIC_ASSERT(2 == static_cast<int>(times_4));
101     STATIC_ASSERT(3 == static_cast<int>(times_8));
102     int scale = static_cast<int>(mode - one);
103     DCHECK(scale >= 0 && scale < 4);
104     return static_cast<ScaleFactor>(scale);
105   }
106 
MemoryOperand(size_t * offset)107   Operand MemoryOperand(size_t* offset) {
108     AddressingMode mode = AddressingModeField::decode(instr_->opcode());
109     switch (mode) {
110       case kMode_MR: {
111         Register base = InputRegister(NextOffset(offset));
112         int32_t disp = 0;
113         return Operand(base, disp);
114       }
115       case kMode_MRI: {
116         Register base = InputRegister(NextOffset(offset));
117         Constant ctant = ToConstant(instr_->InputAt(NextOffset(offset)));
118         return Operand(base, ctant.ToInt32(), ctant.rmode());
119       }
120       case kMode_MR1:
121       case kMode_MR2:
122       case kMode_MR4:
123       case kMode_MR8: {
124         Register base = InputRegister(NextOffset(offset));
125         Register index = InputRegister(NextOffset(offset));
126         ScaleFactor scale = ScaleFor(kMode_MR1, mode);
127         int32_t disp = 0;
128         return Operand(base, index, scale, disp);
129       }
130       case kMode_MR1I:
131       case kMode_MR2I:
132       case kMode_MR4I:
133       case kMode_MR8I: {
134         Register base = InputRegister(NextOffset(offset));
135         Register index = InputRegister(NextOffset(offset));
136         ScaleFactor scale = ScaleFor(kMode_MR1I, mode);
137         Constant ctant = ToConstant(instr_->InputAt(NextOffset(offset)));
138         return Operand(base, index, scale, ctant.ToInt32(), ctant.rmode());
139       }
140       case kMode_M1:
141       case kMode_M2:
142       case kMode_M4:
143       case kMode_M8: {
144         Register index = InputRegister(NextOffset(offset));
145         ScaleFactor scale = ScaleFor(kMode_M1, mode);
146         int32_t disp = 0;
147         return Operand(index, scale, disp);
148       }
149       case kMode_M1I:
150       case kMode_M2I:
151       case kMode_M4I:
152       case kMode_M8I: {
153         Register index = InputRegister(NextOffset(offset));
154         ScaleFactor scale = ScaleFor(kMode_M1I, mode);
155         Constant ctant = ToConstant(instr_->InputAt(NextOffset(offset)));
156         return Operand(index, scale, ctant.ToInt32(), ctant.rmode());
157       }
158       case kMode_MI: {
159         Constant ctant = ToConstant(instr_->InputAt(NextOffset(offset)));
160         return Operand(ctant.ToInt32(), ctant.rmode());
161       }
162       case kMode_None:
163         UNREACHABLE();
164     }
165     UNREACHABLE();
166   }
167 
MemoryOperand(size_t first_input=0)168   Operand MemoryOperand(size_t first_input = 0) {
169     return MemoryOperand(&first_input);
170   }
171 
NextMemoryOperand(size_t offset=0)172   Operand NextMemoryOperand(size_t offset = 0) {
173     AddressingMode mode = AddressingModeField::decode(instr_->opcode());
174     Register base = InputRegister(NextOffset(&offset));
175     const int32_t disp = 4;
176     if (mode == kMode_MR1) {
177       Register index = InputRegister(NextOffset(&offset));
178       ScaleFactor scale = ScaleFor(kMode_MR1, kMode_MR1);
179       return Operand(base, index, scale, disp);
180     } else if (mode == kMode_MRI) {
181       Constant ctant = ToConstant(instr_->InputAt(NextOffset(&offset)));
182       return Operand(base, ctant.ToInt32() + disp, ctant.rmode());
183     } else {
184       UNREACHABLE();
185     }
186   }
187 };
188 
189 
190 namespace {
191 
HasImmediateInput(Instruction * instr,size_t index)192 bool HasImmediateInput(Instruction* instr, size_t index) {
193   return instr->InputAt(index)->IsImmediate();
194 }
195 
196 class OutOfLineLoadFloat32NaN final : public OutOfLineCode {
197  public:
OutOfLineLoadFloat32NaN(CodeGenerator * gen,XMMRegister result)198   OutOfLineLoadFloat32NaN(CodeGenerator* gen, XMMRegister result)
199       : OutOfLineCode(gen), result_(result) {}
200 
Generate()201   void Generate() final {
202     __ xorps(result_, result_);
203     __ divss(result_, result_);
204   }
205 
206  private:
207   XMMRegister const result_;
208 };
209 
210 class OutOfLineLoadFloat64NaN final : public OutOfLineCode {
211  public:
OutOfLineLoadFloat64NaN(CodeGenerator * gen,XMMRegister result)212   OutOfLineLoadFloat64NaN(CodeGenerator* gen, XMMRegister result)
213       : OutOfLineCode(gen), result_(result) {}
214 
Generate()215   void Generate() final {
216     __ xorpd(result_, result_);
217     __ divsd(result_, result_);
218   }
219 
220  private:
221   XMMRegister const result_;
222 };
223 
224 class OutOfLineTruncateDoubleToI final : public OutOfLineCode {
225  public:
OutOfLineTruncateDoubleToI(CodeGenerator * gen,Register result,XMMRegister input,StubCallMode stub_mode)226   OutOfLineTruncateDoubleToI(CodeGenerator* gen, Register result,
227                              XMMRegister input, StubCallMode stub_mode)
228       : OutOfLineCode(gen),
229         result_(result),
230         input_(input),
231         stub_mode_(stub_mode),
232         isolate_(gen->isolate()),
233         zone_(gen->zone()) {}
234 
Generate()235   void Generate() final {
236     __ sub(esp, Immediate(kDoubleSize));
237     __ movsd(MemOperand(esp, 0), input_);
238     if (stub_mode_ == StubCallMode::kCallWasmRuntimeStub) {
239       // A direct call to a wasm runtime stub defined in this module.
240       // Just encode the stub index. This will be patched at relocation.
241       __ wasm_call(wasm::WasmCode::kDoubleToI, RelocInfo::WASM_STUB_CALL);
242     } else {
243       __ Call(BUILTIN_CODE(isolate_, DoubleToI), RelocInfo::CODE_TARGET);
244     }
245     __ mov(result_, MemOperand(esp, 0));
246     __ add(esp, Immediate(kDoubleSize));
247   }
248 
249  private:
250   Register const result_;
251   XMMRegister const input_;
252   StubCallMode stub_mode_;
253   Isolate* isolate_;
254   Zone* zone_;
255 };
256 
257 
258 class OutOfLineRecordWrite final : public OutOfLineCode {
259  public:
OutOfLineRecordWrite(CodeGenerator * gen,Register object,Operand operand,Register value,Register scratch0,Register scratch1,RecordWriteMode mode)260   OutOfLineRecordWrite(CodeGenerator* gen, Register object, Operand operand,
261                        Register value, Register scratch0, Register scratch1,
262                        RecordWriteMode mode)
263       : OutOfLineCode(gen),
264         object_(object),
265         operand_(operand),
266         value_(value),
267         scratch0_(scratch0),
268         scratch1_(scratch1),
269         mode_(mode),
270         zone_(gen->zone()) {}
271 
SaveRegisters(RegList registers)272   void SaveRegisters(RegList registers) {
273     DCHECK_LT(0, NumRegs(registers));
274     for (int i = 0; i < Register::kNumRegisters; ++i) {
275       if ((registers >> i) & 1u) {
276         __ push(Register::from_code(i));
277       }
278     }
279   }
280 
RestoreRegisters(RegList registers)281   void RestoreRegisters(RegList registers) {
282     DCHECK_LT(0, NumRegs(registers));
283     for (int i = Register::kNumRegisters - 1; i >= 0; --i) {
284       if ((registers >> i) & 1u) {
285         __ pop(Register::from_code(i));
286       }
287     }
288   }
289 
Generate()290   void Generate() final {
291     if (mode_ > RecordWriteMode::kValueIsPointer) {
292       __ JumpIfSmi(value_, exit());
293     }
294     __ CheckPageFlag(value_, scratch0_,
295                      MemoryChunk::kPointersToHereAreInterestingMask, zero,
296                      exit());
297     __ lea(scratch1_, operand_);
298     RememberedSetAction const remembered_set_action =
299         mode_ > RecordWriteMode::kValueIsMap ? EMIT_REMEMBERED_SET
300                                              : OMIT_REMEMBERED_SET;
301     SaveFPRegsMode const save_fp_mode =
302         frame()->DidAllocateDoubleRegisters() ? kSaveFPRegs : kDontSaveFPRegs;
303     __ CallRecordWriteStub(object_, scratch1_, remembered_set_action,
304                            save_fp_mode);
305   }
306 
307  private:
308   Register const object_;
309   Operand const operand_;
310   Register const value_;
311   Register const scratch0_;
312   Register const scratch1_;
313   RecordWriteMode const mode_;
314   Zone* zone_;
315 };
316 
MoveOperandIfAliasedWithPoisonRegister(Instruction * call_instruction,CodeGenerator * gen)317 void MoveOperandIfAliasedWithPoisonRegister(Instruction* call_instruction,
318                                             CodeGenerator* gen) {
319   IA32OperandConverter i(gen, call_instruction);
320   int const poison_index = i.InputInt32(1);
321   if (poison_index == -1) {
322     // No aliasing -> nothing to move.
323     return;
324   }
325 
326   InstructionOperand* op = call_instruction->InputAt(poison_index);
327   if (op->IsImmediate() || op->IsConstant()) {
328     gen->tasm()->mov(kSpeculationPoisonRegister, i.ToImmediate(op));
329   } else {
330     gen->tasm()->mov(kSpeculationPoisonRegister, i.InputOperand(poison_index));
331   }
332 }
333 
EmitWordLoadPoisoningIfNeeded(CodeGenerator * codegen,InstructionCode opcode,Instruction * instr,IA32OperandConverter & i)334 void EmitWordLoadPoisoningIfNeeded(CodeGenerator* codegen,
335                                    InstructionCode opcode, Instruction* instr,
336                                    IA32OperandConverter& i) {
337   const MemoryAccessMode access_mode =
338       static_cast<MemoryAccessMode>(MiscField::decode(opcode));
339   if (access_mode == kMemoryAccessPoisoned) {
340     Register value = i.OutputRegister();
341     codegen->tasm()->and_(value, kSpeculationPoisonRegister);
342   }
343 }
344 
345 }  // namespace
346 
347 #define ASSEMBLE_COMPARE(asm_instr)                                   \
348   do {                                                                \
349     if (AddressingModeField::decode(instr->opcode()) != kMode_None) { \
350       size_t index = 0;                                               \
351       Operand left = i.MemoryOperand(&index);                         \
352       if (HasImmediateInput(instr, index)) {                          \
353         __ asm_instr(left, i.InputImmediate(index));                  \
354       } else {                                                        \
355         __ asm_instr(left, i.InputRegister(index));                   \
356       }                                                               \
357     } else {                                                          \
358       if (HasImmediateInput(instr, 1)) {                              \
359         if (instr->InputAt(0)->IsRegister()) {                        \
360           __ asm_instr(i.InputRegister(0), i.InputImmediate(1));      \
361         } else {                                                      \
362           __ asm_instr(i.InputOperand(0), i.InputImmediate(1));       \
363         }                                                             \
364       } else {                                                        \
365         if (instr->InputAt(1)->IsRegister()) {                        \
366           __ asm_instr(i.InputRegister(0), i.InputRegister(1));       \
367         } else {                                                      \
368           __ asm_instr(i.InputRegister(0), i.InputOperand(1));        \
369         }                                                             \
370       }                                                               \
371     }                                                                 \
372   } while (0)
373 
374 #define ASSEMBLE_IEEE754_BINOP(name)                                     \
375   do {                                                                   \
376     /* Pass two doubles as arguments on the stack. */                    \
377     __ PrepareCallCFunction(4, eax);                                     \
378     __ movsd(Operand(esp, 0 * kDoubleSize), i.InputDoubleRegister(0));   \
379     __ movsd(Operand(esp, 1 * kDoubleSize), i.InputDoubleRegister(1));   \
380     __ CallCFunction(ExternalReference::ieee754_##name##_function(), 4); \
381     /* Return value is in st(0) on ia32. */                              \
382     /* Store it into the result register. */                             \
383     __ sub(esp, Immediate(kDoubleSize));                                 \
384     __ fstp_d(Operand(esp, 0));                                          \
385     __ movsd(i.OutputDoubleRegister(), Operand(esp, 0));                 \
386     __ add(esp, Immediate(kDoubleSize));                                 \
387   } while (false)
388 
389 #define ASSEMBLE_IEEE754_UNOP(name)                                      \
390   do {                                                                   \
391     /* Pass one double as argument on the stack. */                      \
392     __ PrepareCallCFunction(2, eax);                                     \
393     __ movsd(Operand(esp, 0 * kDoubleSize), i.InputDoubleRegister(0));   \
394     __ CallCFunction(ExternalReference::ieee754_##name##_function(), 2); \
395     /* Return value is in st(0) on ia32. */                              \
396     /* Store it into the result register. */                             \
397     __ sub(esp, Immediate(kDoubleSize));                                 \
398     __ fstp_d(Operand(esp, 0));                                          \
399     __ movsd(i.OutputDoubleRegister(), Operand(esp, 0));                 \
400     __ add(esp, Immediate(kDoubleSize));                                 \
401   } while (false)
402 
403 #define ASSEMBLE_BINOP(asm_instr)                                     \
404   do {                                                                \
405     if (AddressingModeField::decode(instr->opcode()) != kMode_None) { \
406       size_t index = 1;                                               \
407       Operand right = i.MemoryOperand(&index);                        \
408       __ asm_instr(i.InputRegister(0), right);                        \
409     } else {                                                          \
410       if (HasImmediateInput(instr, 1)) {                              \
411         __ asm_instr(i.InputOperand(0), i.InputImmediate(1));         \
412       } else {                                                        \
413         __ asm_instr(i.InputRegister(0), i.InputOperand(1));          \
414       }                                                               \
415     }                                                                 \
416   } while (0)
417 
418 #define ASSEMBLE_ATOMIC_BINOP(bin_inst, mov_inst, cmpxchg_inst) \
419   do {                                                          \
420     Label binop;                                                \
421     __ bind(&binop);                                            \
422     __ mov_inst(eax, i.MemoryOperand(1));                       \
423     __ Move(i.TempRegister(0), eax);                            \
424     __ bin_inst(i.TempRegister(0), i.InputRegister(0));         \
425     __ lock();                                                  \
426     __ cmpxchg_inst(i.MemoryOperand(1), i.TempRegister(0));     \
427     __ j(not_equal, &binop);                                    \
428   } while (false)
429 
430 #define ASSEMBLE_I64ATOMIC_BINOP(instr1, instr2)         \
431   do {                                                   \
432     Label binop;                                         \
433     __ bind(&binop);                                     \
434     __ mov(i.OutputRegister(0), i.MemoryOperand(2));     \
435     __ mov(i.OutputRegister(1), i.NextMemoryOperand(2)); \
436     __ push(i.InputRegister(0));                         \
437     __ push(i.InputRegister(1));                         \
438     __ instr1(i.InputRegister(0), i.OutputRegister(0));  \
439     __ instr2(i.InputRegister(1), i.OutputRegister(1));  \
440     __ lock();                                           \
441     __ cmpxchg8b(i.MemoryOperand(2));                    \
442     __ pop(i.InputRegister(1));                          \
443     __ pop(i.InputRegister(0));                          \
444     __ j(not_equal, &binop);                             \
445   } while (false);
446 
447 #define ASSEMBLE_MOVX(mov_instr)                            \
448   do {                                                      \
449     if (instr->addressing_mode() != kMode_None) {           \
450       __ mov_instr(i.OutputRegister(), i.MemoryOperand());  \
451     } else if (instr->InputAt(0)->IsRegister()) {           \
452       __ mov_instr(i.OutputRegister(), i.InputRegister(0)); \
453     } else {                                                \
454       __ mov_instr(i.OutputRegister(), i.InputOperand(0));  \
455     }                                                       \
456   } while (0)
457 
458 #define ASSEMBLE_SIMD_PUNPCK_SHUFFLE(opcode)                         \
459   do {                                                               \
460     XMMRegister src0 = i.InputSimd128Register(0);                    \
461     Operand src1 = i.InputOperand(instr->InputCount() == 2 ? 1 : 0); \
462     if (CpuFeatures::IsSupported(AVX)) {                             \
463       CpuFeatureScope avx_scope(tasm(), AVX);                        \
464       __ v##opcode(i.OutputSimd128Register(), src0, src1);           \
465     } else {                                                         \
466       DCHECK_EQ(i.OutputSimd128Register(), src0);                    \
467       __ opcode(i.OutputSimd128Register(), src1);                    \
468     }                                                                \
469   } while (false)
470 
471 #define ASSEMBLE_SIMD_IMM_SHUFFLE(opcode, SSELevel, imm)               \
472   if (CpuFeatures::IsSupported(AVX)) {                                 \
473     CpuFeatureScope avx_scope(tasm(), AVX);                            \
474     __ v##opcode(i.OutputSimd128Register(), i.InputSimd128Register(0), \
475                  i.InputOperand(1), imm);                              \
476   } else {                                                             \
477     CpuFeatureScope sse_scope(tasm(), SSELevel);                       \
478     DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));   \
479     __ opcode(i.OutputSimd128Register(), i.InputOperand(1), imm);      \
480   }
481 
AssembleDeconstructFrame()482 void CodeGenerator::AssembleDeconstructFrame() {
483   __ mov(esp, ebp);
484   __ pop(ebp);
485 }
486 
AssemblePrepareTailCall()487 void CodeGenerator::AssemblePrepareTailCall() {
488   if (frame_access_state()->has_frame()) {
489     __ mov(ebp, MemOperand(ebp, 0));
490   }
491   frame_access_state()->SetFrameAccessToSP();
492 }
493 
AssemblePopArgumentsAdaptorFrame(Register args_reg,Register,Register,Register)494 void CodeGenerator::AssemblePopArgumentsAdaptorFrame(Register args_reg,
495                                                      Register, Register,
496                                                      Register) {
497   // There are not enough temp registers left on ia32 for a call instruction
498   // so we pick some scratch registers and save/restore them manually here.
499   int scratch_count = 3;
500   Register scratch1 = ebx;
501   Register scratch2 = ecx;
502   Register scratch3 = edx;
503   DCHECK(!AreAliased(args_reg, scratch1, scratch2, scratch3));
504   Label done;
505 
506   // Check if current frame is an arguments adaptor frame.
507   __ cmp(Operand(ebp, StandardFrameConstants::kContextOffset),
508          Immediate(StackFrame::TypeToMarker(StackFrame::ARGUMENTS_ADAPTOR)));
509   __ j(not_equal, &done, Label::kNear);
510 
511   __ push(scratch1);
512   __ push(scratch2);
513   __ push(scratch3);
514 
515   // Load arguments count from current arguments adaptor frame (note, it
516   // does not include receiver).
517   Register caller_args_count_reg = scratch1;
518   __ mov(caller_args_count_reg,
519          Operand(ebp, ArgumentsAdaptorFrameConstants::kLengthOffset));
520   __ SmiUntag(caller_args_count_reg);
521 
522   ParameterCount callee_args_count(args_reg);
523   __ PrepareForTailCall(callee_args_count, caller_args_count_reg, scratch2,
524                         scratch3, scratch_count);
525   __ pop(scratch3);
526   __ pop(scratch2);
527   __ pop(scratch1);
528 
529   __ bind(&done);
530 }
531 
532 namespace {
533 
AdjustStackPointerForTailCall(TurboAssembler * tasm,FrameAccessState * state,int new_slot_above_sp,bool allow_shrinkage=true)534 void AdjustStackPointerForTailCall(TurboAssembler* tasm,
535                                    FrameAccessState* state,
536                                    int new_slot_above_sp,
537                                    bool allow_shrinkage = true) {
538   int current_sp_offset = state->GetSPToFPSlotCount() +
539                           StandardFrameConstants::kFixedSlotCountAboveFp;
540   int stack_slot_delta = new_slot_above_sp - current_sp_offset;
541   if (stack_slot_delta > 0) {
542     tasm->sub(esp, Immediate(stack_slot_delta * kPointerSize));
543     state->IncreaseSPDelta(stack_slot_delta);
544   } else if (allow_shrinkage && stack_slot_delta < 0) {
545     tasm->add(esp, Immediate(-stack_slot_delta * kPointerSize));
546     state->IncreaseSPDelta(stack_slot_delta);
547   }
548 }
549 
550 }  // namespace
551 
AssembleTailCallBeforeGap(Instruction * instr,int first_unused_stack_slot)552 void CodeGenerator::AssembleTailCallBeforeGap(Instruction* instr,
553                                               int first_unused_stack_slot) {
554   CodeGenerator::PushTypeFlags flags(kImmediatePush | kScalarPush);
555   ZoneVector<MoveOperands*> pushes(zone());
556   GetPushCompatibleMoves(instr, flags, &pushes);
557 
558   if (!pushes.empty() &&
559       (LocationOperand::cast(pushes.back()->destination()).index() + 1 ==
560        first_unused_stack_slot)) {
561     IA32OperandConverter g(this, instr);
562     for (auto move : pushes) {
563       LocationOperand destination_location(
564           LocationOperand::cast(move->destination()));
565       InstructionOperand source(move->source());
566       AdjustStackPointerForTailCall(tasm(), frame_access_state(),
567                                     destination_location.index());
568       if (source.IsStackSlot()) {
569         LocationOperand source_location(LocationOperand::cast(source));
570         __ push(g.SlotToOperand(source_location.index()));
571       } else if (source.IsRegister()) {
572         LocationOperand source_location(LocationOperand::cast(source));
573         __ push(source_location.GetRegister());
574       } else if (source.IsImmediate()) {
575         __ push(Immediate(ImmediateOperand::cast(source).inline_value()));
576       } else {
577         // Pushes of non-scalar data types is not supported.
578         UNIMPLEMENTED();
579       }
580       frame_access_state()->IncreaseSPDelta(1);
581       move->Eliminate();
582     }
583   }
584   AdjustStackPointerForTailCall(tasm(), frame_access_state(),
585                                 first_unused_stack_slot, false);
586 }
587 
AssembleTailCallAfterGap(Instruction * instr,int first_unused_stack_slot)588 void CodeGenerator::AssembleTailCallAfterGap(Instruction* instr,
589                                              int first_unused_stack_slot) {
590   AdjustStackPointerForTailCall(tasm(), frame_access_state(),
591                                 first_unused_stack_slot);
592 }
593 
594 // Check that {kJavaScriptCallCodeStartRegister} is correct.
AssembleCodeStartRegisterCheck()595 void CodeGenerator::AssembleCodeStartRegisterCheck() {
596   __ push(eax);  // Push eax so we can use it as a scratch register.
597   __ ComputeCodeStartAddress(eax);
598   __ cmp(eax, kJavaScriptCallCodeStartRegister);
599   __ Assert(equal, AbortReason::kWrongFunctionCodeStart);
600   __ pop(eax);  // Restore eax.
601 }
602 
603 // Check if the code object is marked for deoptimization. If it is, then it
604 // jumps to the CompileLazyDeoptimizedCode builtin. In order to do this we need
605 // to:
606 //    1. read from memory the word that contains that bit, which can be found in
607 //       the flags in the referenced {CodeDataContainer} object;
608 //    2. test kMarkedForDeoptimizationBit in those flags; and
609 //    3. if it is not zero then it jumps to the builtin.
BailoutIfDeoptimized()610 void CodeGenerator::BailoutIfDeoptimized() {
611   int offset = Code::kCodeDataContainerOffset - Code::kHeaderSize;
612   __ mov(ebx, Operand(kJavaScriptCallCodeStartRegister, offset));
613   __ test(FieldOperand(ebx, CodeDataContainer::kKindSpecificFlagsOffset),
614           Immediate(1 << Code::kMarkedForDeoptimizationBit));
615   // Ensure we're not serializing (otherwise we'd need to use an indirection to
616   // access the builtin below).
617   DCHECK(!isolate()->ShouldLoadConstantsFromRootList());
618   Handle<Code> code = isolate()->builtins()->builtin_handle(
619       Builtins::kCompileLazyDeoptimizedCode);
620   __ j(not_zero, code, RelocInfo::CODE_TARGET);
621 }
622 
GenerateSpeculationPoisonFromCodeStartRegister()623 void CodeGenerator::GenerateSpeculationPoisonFromCodeStartRegister() {
624   __ push(eax);  // Push eax so we can use it as a scratch register.
625 
626   // Set a mask which has all bits set in the normal case, but has all
627   // bits cleared if we are speculatively executing the wrong PC.
628   __ ComputeCodeStartAddress(eax);
629   __ mov(kSpeculationPoisonRegister, Immediate(0));
630   __ cmp(kJavaScriptCallCodeStartRegister, eax);
631   __ mov(eax, Immediate(-1));
632   __ cmov(equal, kSpeculationPoisonRegister, eax);
633 
634   __ pop(eax);  // Restore eax.
635 }
636 
AssembleRegisterArgumentPoisoning()637 void CodeGenerator::AssembleRegisterArgumentPoisoning() {
638   __ and_(kJSFunctionRegister, kSpeculationPoisonRegister);
639   __ and_(kContextRegister, kSpeculationPoisonRegister);
640   __ and_(esp, kSpeculationPoisonRegister);
641 }
642 
643 // Assembles an instruction after register allocation, producing machine code.
AssembleArchInstruction(Instruction * instr)644 CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
645     Instruction* instr) {
646   IA32OperandConverter i(this, instr);
647   InstructionCode opcode = instr->opcode();
648   ArchOpcode arch_opcode = ArchOpcodeField::decode(opcode);
649   switch (arch_opcode) {
650     case kArchCallCodeObject: {
651       MoveOperandIfAliasedWithPoisonRegister(instr, this);
652       if (HasImmediateInput(instr, 0)) {
653         Handle<Code> code = i.InputCode(0);
654         __ call(code, RelocInfo::CODE_TARGET);
655       } else {
656         Register reg = i.InputRegister(0);
657         DCHECK_IMPLIES(
658             HasCallDescriptorFlag(instr, CallDescriptor::kFixedTargetRegister),
659             reg == kJavaScriptCallCodeStartRegister);
660         __ add(reg, Immediate(Code::kHeaderSize - kHeapObjectTag));
661         if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
662           __ RetpolineCall(reg);
663         } else {
664           __ call(reg);
665         }
666       }
667       RecordCallPosition(instr);
668       frame_access_state()->ClearSPDelta();
669       break;
670     }
671     case kArchCallWasmFunction: {
672       MoveOperandIfAliasedWithPoisonRegister(instr, this);
673       if (HasImmediateInput(instr, 0)) {
674         Constant constant = i.ToConstant(instr->InputAt(0));
675         Address wasm_code = static_cast<Address>(constant.ToInt32());
676         if (DetermineStubCallMode() == StubCallMode::kCallWasmRuntimeStub) {
677           __ wasm_call(wasm_code, constant.rmode());
678         } else {
679           if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
680             __ RetpolineCall(wasm_code, constant.rmode());
681           } else {
682             __ call(wasm_code, constant.rmode());
683           }
684         }
685       } else {
686         Register reg = i.InputRegister(0);
687         if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
688           __ RetpolineCall(reg);
689         } else {
690           __ call(reg);
691         }
692       }
693       RecordCallPosition(instr);
694       frame_access_state()->ClearSPDelta();
695       break;
696     }
697     case kArchTailCallCodeObjectFromJSFunction:
698     case kArchTailCallCodeObject: {
699       MoveOperandIfAliasedWithPoisonRegister(instr, this);
700       if (arch_opcode == kArchTailCallCodeObjectFromJSFunction) {
701         AssemblePopArgumentsAdaptorFrame(kJavaScriptCallArgCountRegister,
702                                          no_reg, no_reg, no_reg);
703       }
704       if (HasImmediateInput(instr, 0)) {
705         Handle<Code> code = i.InputCode(0);
706         __ jmp(code, RelocInfo::CODE_TARGET);
707       } else {
708         Register reg = i.InputRegister(0);
709         DCHECK_IMPLIES(
710             HasCallDescriptorFlag(instr, CallDescriptor::kFixedTargetRegister),
711             reg == kJavaScriptCallCodeStartRegister);
712         __ add(reg, Immediate(Code::kHeaderSize - kHeapObjectTag));
713         if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
714           __ RetpolineJump(reg);
715         } else {
716           __ jmp(reg);
717         }
718       }
719       frame_access_state()->ClearSPDelta();
720       frame_access_state()->SetFrameAccessToDefault();
721       break;
722     }
723     case kArchTailCallWasm: {
724       MoveOperandIfAliasedWithPoisonRegister(instr, this);
725       if (HasImmediateInput(instr, 0)) {
726         Constant constant = i.ToConstant(instr->InputAt(0));
727         Address wasm_code = static_cast<Address>(constant.ToInt32());
728         __ jmp(wasm_code, constant.rmode());
729       } else {
730         Register reg = i.InputRegister(0);
731         if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
732           __ RetpolineJump(reg);
733         } else {
734           __ jmp(reg);
735         }
736       }
737       frame_access_state()->ClearSPDelta();
738       frame_access_state()->SetFrameAccessToDefault();
739       break;
740     }
741     case kArchTailCallAddress: {
742       MoveOperandIfAliasedWithPoisonRegister(instr, this);
743       CHECK(!HasImmediateInput(instr, 0));
744       Register reg = i.InputRegister(0);
745       DCHECK_IMPLIES(
746           HasCallDescriptorFlag(instr, CallDescriptor::kFixedTargetRegister),
747           reg == kJavaScriptCallCodeStartRegister);
748       if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
749         __ RetpolineJump(reg);
750       } else {
751         __ jmp(reg);
752       }
753       frame_access_state()->ClearSPDelta();
754       frame_access_state()->SetFrameAccessToDefault();
755       break;
756     }
757     case kArchCallJSFunction: {
758       MoveOperandIfAliasedWithPoisonRegister(instr, this);
759       Register func = i.InputRegister(0);
760       if (FLAG_debug_code) {
761         // Check the function's context matches the context argument.
762         __ cmp(esi, FieldOperand(func, JSFunction::kContextOffset));
763         __ Assert(equal, AbortReason::kWrongFunctionContext);
764       }
765       static_assert(kJavaScriptCallCodeStartRegister == ecx, "ABI mismatch");
766       __ mov(ecx, FieldOperand(func, JSFunction::kCodeOffset));
767       __ add(ecx, Immediate(Code::kHeaderSize - kHeapObjectTag));
768       __ call(ecx);
769       RecordCallPosition(instr);
770       frame_access_state()->ClearSPDelta();
771       break;
772     }
773     case kArchPrepareCallCFunction: {
774       // Frame alignment requires using FP-relative frame addressing.
775       frame_access_state()->SetFrameAccessToFP();
776       int const num_parameters = MiscField::decode(instr->opcode());
777       __ PrepareCallCFunction(num_parameters, i.TempRegister(0));
778       break;
779     }
780     case kArchSaveCallerRegisters: {
781       fp_mode_ =
782           static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode()));
783       DCHECK(fp_mode_ == kDontSaveFPRegs || fp_mode_ == kSaveFPRegs);
784       // kReturnRegister0 should have been saved before entering the stub.
785       int bytes = __ PushCallerSaved(fp_mode_, kReturnRegister0);
786       DCHECK_EQ(0, bytes % kPointerSize);
787       DCHECK_EQ(0, frame_access_state()->sp_delta());
788       frame_access_state()->IncreaseSPDelta(bytes / kPointerSize);
789       DCHECK(!caller_registers_saved_);
790       caller_registers_saved_ = true;
791       break;
792     }
793     case kArchRestoreCallerRegisters: {
794       DCHECK(fp_mode_ ==
795              static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode())));
796       DCHECK(fp_mode_ == kDontSaveFPRegs || fp_mode_ == kSaveFPRegs);
797       // Don't overwrite the returned value.
798       int bytes = __ PopCallerSaved(fp_mode_, kReturnRegister0);
799       frame_access_state()->IncreaseSPDelta(-(bytes / kPointerSize));
800       DCHECK_EQ(0, frame_access_state()->sp_delta());
801       DCHECK(caller_registers_saved_);
802       caller_registers_saved_ = false;
803       break;
804     }
805     case kArchPrepareTailCall:
806       AssemblePrepareTailCall();
807       break;
808     case kArchCallCFunction: {
809       MoveOperandIfAliasedWithPoisonRegister(instr, this);
810       int const num_parameters = MiscField::decode(instr->opcode());
811       if (HasImmediateInput(instr, 0)) {
812         ExternalReference ref = i.InputExternalReference(0);
813         __ CallCFunction(ref, num_parameters);
814       } else {
815         Register func = i.InputRegister(0);
816         __ CallCFunction(func, num_parameters);
817       }
818       frame_access_state()->SetFrameAccessToDefault();
819       // Ideally, we should decrement SP delta to match the change of stack
820       // pointer in CallCFunction. However, for certain architectures (e.g.
821       // ARM), there may be more strict alignment requirement, causing old SP
822       // to be saved on the stack. In those cases, we can not calculate the SP
823       // delta statically.
824       frame_access_state()->ClearSPDelta();
825       if (caller_registers_saved_) {
826         // Need to re-sync SP delta introduced in kArchSaveCallerRegisters.
827         // Here, we assume the sequence to be:
828         //   kArchSaveCallerRegisters;
829         //   kArchCallCFunction;
830         //   kArchRestoreCallerRegisters;
831         int bytes =
832             __ RequiredStackSizeForCallerSaved(fp_mode_, kReturnRegister0);
833         frame_access_state()->IncreaseSPDelta(bytes / kPointerSize);
834       }
835       break;
836     }
837     case kArchJmp:
838       AssembleArchJump(i.InputRpo(0));
839       break;
840     case kArchBinarySearchSwitch:
841       AssembleArchBinarySearchSwitch(instr);
842       break;
843     case kArchLookupSwitch:
844       AssembleArchLookupSwitch(instr);
845       break;
846     case kArchTableSwitch:
847       AssembleArchTableSwitch(instr);
848       break;
849     case kArchComment:
850       __ RecordComment(reinterpret_cast<const char*>(i.InputInt32(0)));
851       break;
852     case kArchDebugAbort:
853       DCHECK(i.InputRegister(0) == edx);
854       if (!frame_access_state()->has_frame()) {
855         // We don't actually want to generate a pile of code for this, so just
856         // claim there is a stack frame, without generating one.
857         FrameScope scope(tasm(), StackFrame::NONE);
858         __ Call(isolate()->builtins()->builtin_handle(Builtins::kAbortJS),
859                 RelocInfo::CODE_TARGET);
860       } else {
861         __ Call(isolate()->builtins()->builtin_handle(Builtins::kAbortJS),
862                 RelocInfo::CODE_TARGET);
863       }
864       __ int3();
865       break;
866     case kArchDebugBreak:
867       __ int3();
868       break;
869     case kArchNop:
870     case kArchThrowTerminator:
871       // don't emit code for nops.
872       break;
873     case kArchDeoptimize: {
874       int deopt_state_id =
875           BuildTranslation(instr, -1, 0, OutputFrameStateCombine::Ignore());
876       CodeGenResult result =
877           AssembleDeoptimizerCall(deopt_state_id, current_source_position_);
878       if (result != kSuccess) return result;
879       break;
880     }
881     case kArchRet:
882       AssembleReturn(instr->InputAt(0));
883       break;
884     case kArchStackPointer:
885       __ mov(i.OutputRegister(), esp);
886       break;
887     case kArchFramePointer:
888       __ mov(i.OutputRegister(), ebp);
889       break;
890     case kArchParentFramePointer:
891       if (frame_access_state()->has_frame()) {
892         __ mov(i.OutputRegister(), Operand(ebp, 0));
893       } else {
894         __ mov(i.OutputRegister(), ebp);
895       }
896       break;
897     case kArchTruncateDoubleToI: {
898       auto result = i.OutputRegister();
899       auto input = i.InputDoubleRegister(0);
900       auto ool = new (zone()) OutOfLineTruncateDoubleToI(
901           this, result, input, DetermineStubCallMode());
902       __ cvttsd2si(result, Operand(input));
903       __ cmp(result, 1);
904       __ j(overflow, ool->entry());
905       __ bind(ool->exit());
906       break;
907     }
908     case kArchStoreWithWriteBarrier: {
909       RecordWriteMode mode =
910           static_cast<RecordWriteMode>(MiscField::decode(instr->opcode()));
911       Register object = i.InputRegister(0);
912       size_t index = 0;
913       Operand operand = i.MemoryOperand(&index);
914       Register value = i.InputRegister(index);
915       Register scratch0 = i.TempRegister(0);
916       Register scratch1 = i.TempRegister(1);
917       auto ool = new (zone()) OutOfLineRecordWrite(this, object, operand, value,
918                                                    scratch0, scratch1, mode);
919       __ mov(operand, value);
920       __ CheckPageFlag(object, scratch0,
921                        MemoryChunk::kPointersFromHereAreInterestingMask,
922                        not_zero, ool->entry());
923       __ bind(ool->exit());
924       break;
925     }
926     case kArchStackSlot: {
927       FrameOffset offset =
928           frame_access_state()->GetFrameOffset(i.InputInt32(0));
929       Register base = offset.from_stack_pointer() ? esp : ebp;
930       __ lea(i.OutputRegister(), Operand(base, offset.offset()));
931       break;
932     }
933     case kIeee754Float64Acos:
934       ASSEMBLE_IEEE754_UNOP(acos);
935       break;
936     case kIeee754Float64Acosh:
937       ASSEMBLE_IEEE754_UNOP(acosh);
938       break;
939     case kIeee754Float64Asin:
940       ASSEMBLE_IEEE754_UNOP(asin);
941       break;
942     case kIeee754Float64Asinh:
943       ASSEMBLE_IEEE754_UNOP(asinh);
944       break;
945     case kIeee754Float64Atan:
946       ASSEMBLE_IEEE754_UNOP(atan);
947       break;
948     case kIeee754Float64Atanh:
949       ASSEMBLE_IEEE754_UNOP(atanh);
950       break;
951     case kIeee754Float64Atan2:
952       ASSEMBLE_IEEE754_BINOP(atan2);
953       break;
954     case kIeee754Float64Cbrt:
955       ASSEMBLE_IEEE754_UNOP(cbrt);
956       break;
957     case kIeee754Float64Cos:
958       ASSEMBLE_IEEE754_UNOP(cos);
959       break;
960     case kIeee754Float64Cosh:
961       ASSEMBLE_IEEE754_UNOP(cosh);
962       break;
963     case kIeee754Float64Expm1:
964       ASSEMBLE_IEEE754_UNOP(expm1);
965       break;
966     case kIeee754Float64Exp:
967       ASSEMBLE_IEEE754_UNOP(exp);
968       break;
969     case kIeee754Float64Log:
970       ASSEMBLE_IEEE754_UNOP(log);
971       break;
972     case kIeee754Float64Log1p:
973       ASSEMBLE_IEEE754_UNOP(log1p);
974       break;
975     case kIeee754Float64Log2:
976       ASSEMBLE_IEEE754_UNOP(log2);
977       break;
978     case kIeee754Float64Log10:
979       ASSEMBLE_IEEE754_UNOP(log10);
980       break;
981     case kIeee754Float64Pow: {
982       // TODO(bmeurer): Improve integration of the stub.
983       if (i.InputDoubleRegister(1) != xmm2) {
984         __ movaps(xmm2, i.InputDoubleRegister(0));
985         __ movaps(xmm1, i.InputDoubleRegister(1));
986       } else {
987         __ movaps(xmm0, i.InputDoubleRegister(0));
988         __ movaps(xmm1, xmm2);
989         __ movaps(xmm2, xmm0);
990       }
991       __ Call(BUILTIN_CODE(isolate(), MathPowInternal), RelocInfo::CODE_TARGET);
992       __ movaps(i.OutputDoubleRegister(), xmm3);
993       break;
994     }
995     case kIeee754Float64Sin:
996       ASSEMBLE_IEEE754_UNOP(sin);
997       break;
998     case kIeee754Float64Sinh:
999       ASSEMBLE_IEEE754_UNOP(sinh);
1000       break;
1001     case kIeee754Float64Tan:
1002       ASSEMBLE_IEEE754_UNOP(tan);
1003       break;
1004     case kIeee754Float64Tanh:
1005       ASSEMBLE_IEEE754_UNOP(tanh);
1006       break;
1007     case kIA32Add:
1008       ASSEMBLE_BINOP(add);
1009       break;
1010     case kIA32And:
1011       ASSEMBLE_BINOP(and_);
1012       break;
1013     case kIA32Cmp:
1014       ASSEMBLE_COMPARE(cmp);
1015       break;
1016     case kIA32Cmp16:
1017       ASSEMBLE_COMPARE(cmpw);
1018       break;
1019     case kIA32Cmp8:
1020       ASSEMBLE_COMPARE(cmpb);
1021       break;
1022     case kIA32Test:
1023       ASSEMBLE_COMPARE(test);
1024       break;
1025     case kIA32Test16:
1026       ASSEMBLE_COMPARE(test_w);
1027       break;
1028     case kIA32Test8:
1029       ASSEMBLE_COMPARE(test_b);
1030       break;
1031     case kIA32Imul:
1032       if (HasImmediateInput(instr, 1)) {
1033         __ imul(i.OutputRegister(), i.InputOperand(0), i.InputInt32(1));
1034       } else {
1035         __ imul(i.OutputRegister(), i.InputOperand(1));
1036       }
1037       break;
1038     case kIA32ImulHigh:
1039       __ imul(i.InputRegister(1));
1040       break;
1041     case kIA32UmulHigh:
1042       __ mul(i.InputRegister(1));
1043       break;
1044     case kIA32Idiv:
1045       __ cdq();
1046       __ idiv(i.InputOperand(1));
1047       break;
1048     case kIA32Udiv:
1049       __ Move(edx, Immediate(0));
1050       __ div(i.InputOperand(1));
1051       break;
1052     case kIA32Not:
1053       __ not_(i.OutputOperand());
1054       break;
1055     case kIA32Neg:
1056       __ neg(i.OutputOperand());
1057       break;
1058     case kIA32Or:
1059       ASSEMBLE_BINOP(or_);
1060       break;
1061     case kIA32Xor:
1062       ASSEMBLE_BINOP(xor_);
1063       break;
1064     case kIA32Sub:
1065       ASSEMBLE_BINOP(sub);
1066       break;
1067     case kIA32Shl:
1068       if (HasImmediateInput(instr, 1)) {
1069         __ shl(i.OutputOperand(), i.InputInt5(1));
1070       } else {
1071         __ shl_cl(i.OutputOperand());
1072       }
1073       break;
1074     case kIA32Shr:
1075       if (HasImmediateInput(instr, 1)) {
1076         __ shr(i.OutputOperand(), i.InputInt5(1));
1077       } else {
1078         __ shr_cl(i.OutputOperand());
1079       }
1080       break;
1081     case kIA32Sar:
1082       if (HasImmediateInput(instr, 1)) {
1083         __ sar(i.OutputOperand(), i.InputInt5(1));
1084       } else {
1085         __ sar_cl(i.OutputOperand());
1086       }
1087       break;
1088     case kIA32AddPair: {
1089       // i.OutputRegister(0) == i.InputRegister(0) ... left low word.
1090       // i.InputRegister(1) ... left high word.
1091       // i.InputRegister(2) ... right low word.
1092       // i.InputRegister(3) ... right high word.
1093       bool use_temp = false;
1094       if (i.OutputRegister(0).code() == i.InputRegister(1).code() ||
1095           i.OutputRegister(0).code() == i.InputRegister(3).code()) {
1096         // We cannot write to the output register directly, because it would
1097         // overwrite an input for adc. We have to use the temp register.
1098         use_temp = true;
1099         __ Move(i.TempRegister(0), i.InputRegister(0));
1100         __ add(i.TempRegister(0), i.InputRegister(2));
1101       } else {
1102         __ add(i.OutputRegister(0), i.InputRegister(2));
1103       }
1104       if (i.OutputRegister(1).code() != i.InputRegister(1).code()) {
1105         __ Move(i.OutputRegister(1), i.InputRegister(1));
1106       }
1107       __ adc(i.OutputRegister(1), Operand(i.InputRegister(3)));
1108       if (use_temp) {
1109         __ Move(i.OutputRegister(0), i.TempRegister(0));
1110       }
1111       break;
1112     }
1113     case kIA32SubPair: {
1114       // i.OutputRegister(0) == i.InputRegister(0) ... left low word.
1115       // i.InputRegister(1) ... left high word.
1116       // i.InputRegister(2) ... right low word.
1117       // i.InputRegister(3) ... right high word.
1118       bool use_temp = false;
1119       if (i.OutputRegister(0).code() == i.InputRegister(1).code() ||
1120           i.OutputRegister(0).code() == i.InputRegister(3).code()) {
1121         // We cannot write to the output register directly, because it would
1122         // overwrite an input for adc. We have to use the temp register.
1123         use_temp = true;
1124         __ Move(i.TempRegister(0), i.InputRegister(0));
1125         __ sub(i.TempRegister(0), i.InputRegister(2));
1126       } else {
1127         __ sub(i.OutputRegister(0), i.InputRegister(2));
1128       }
1129       if (i.OutputRegister(1).code() != i.InputRegister(1).code()) {
1130         __ Move(i.OutputRegister(1), i.InputRegister(1));
1131       }
1132       __ sbb(i.OutputRegister(1), Operand(i.InputRegister(3)));
1133       if (use_temp) {
1134         __ Move(i.OutputRegister(0), i.TempRegister(0));
1135       }
1136       break;
1137     }
1138     case kIA32MulPair: {
1139       __ imul(i.OutputRegister(1), i.InputOperand(0));
1140       __ mov(i.TempRegister(0), i.InputOperand(1));
1141       __ imul(i.TempRegister(0), i.InputOperand(2));
1142       __ add(i.OutputRegister(1), i.TempRegister(0));
1143       __ mov(i.OutputRegister(0), i.InputOperand(0));
1144       // Multiplies the low words and stores them in eax and edx.
1145       __ mul(i.InputRegister(2));
1146       __ add(i.OutputRegister(1), i.TempRegister(0));
1147 
1148       break;
1149     }
1150     case kIA32ShlPair:
1151       if (HasImmediateInput(instr, 2)) {
1152         __ ShlPair(i.InputRegister(1), i.InputRegister(0), i.InputInt6(2));
1153       } else {
1154         // Shift has been loaded into CL by the register allocator.
1155         __ ShlPair_cl(i.InputRegister(1), i.InputRegister(0));
1156       }
1157       break;
1158     case kIA32ShrPair:
1159       if (HasImmediateInput(instr, 2)) {
1160         __ ShrPair(i.InputRegister(1), i.InputRegister(0), i.InputInt6(2));
1161       } else {
1162         // Shift has been loaded into CL by the register allocator.
1163         __ ShrPair_cl(i.InputRegister(1), i.InputRegister(0));
1164       }
1165       break;
1166     case kIA32SarPair:
1167       if (HasImmediateInput(instr, 2)) {
1168         __ SarPair(i.InputRegister(1), i.InputRegister(0), i.InputInt6(2));
1169       } else {
1170         // Shift has been loaded into CL by the register allocator.
1171         __ SarPair_cl(i.InputRegister(1), i.InputRegister(0));
1172       }
1173       break;
1174     case kIA32Ror:
1175       if (HasImmediateInput(instr, 1)) {
1176         __ ror(i.OutputOperand(), i.InputInt5(1));
1177       } else {
1178         __ ror_cl(i.OutputOperand());
1179       }
1180       break;
1181     case kIA32Lzcnt:
1182       __ Lzcnt(i.OutputRegister(), i.InputOperand(0));
1183       break;
1184     case kIA32Tzcnt:
1185       __ Tzcnt(i.OutputRegister(), i.InputOperand(0));
1186       break;
1187     case kIA32Popcnt:
1188       __ Popcnt(i.OutputRegister(), i.InputOperand(0));
1189       break;
1190     case kIA32Bswap:
1191       __ bswap(i.OutputRegister());
1192       break;
1193     case kArchWordPoisonOnSpeculation:
1194       DCHECK_EQ(i.OutputRegister(), i.InputRegister(0));
1195       __ and_(i.InputRegister(0), kSpeculationPoisonRegister);
1196       break;
1197     case kLFence:
1198       __ lfence();
1199       break;
1200     case kSSEFloat32Cmp:
1201       __ ucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
1202       break;
1203     case kSSEFloat32Add:
1204       __ addss(i.InputDoubleRegister(0), i.InputOperand(1));
1205       break;
1206     case kSSEFloat32Sub:
1207       __ subss(i.InputDoubleRegister(0), i.InputOperand(1));
1208       break;
1209     case kSSEFloat32Mul:
1210       __ mulss(i.InputDoubleRegister(0), i.InputOperand(1));
1211       break;
1212     case kSSEFloat32Div:
1213       __ divss(i.InputDoubleRegister(0), i.InputOperand(1));
1214       // Don't delete this mov. It may improve performance on some CPUs,
1215       // when there is a (v)mulss depending on the result.
1216       __ movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
1217       break;
1218     case kSSEFloat32Sqrt:
1219       __ sqrtss(i.OutputDoubleRegister(), i.InputOperand(0));
1220       break;
1221     case kSSEFloat32Abs: {
1222       // TODO(bmeurer): Use 128-bit constants.
1223       __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
1224       __ psrlq(kScratchDoubleReg, 33);
1225       __ andps(i.OutputDoubleRegister(), kScratchDoubleReg);
1226       break;
1227     }
1228     case kSSEFloat32Neg: {
1229       // TODO(bmeurer): Use 128-bit constants.
1230       __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
1231       __ psllq(kScratchDoubleReg, 31);
1232       __ xorps(i.OutputDoubleRegister(), kScratchDoubleReg);
1233       break;
1234     }
1235     case kSSEFloat32Round: {
1236       CpuFeatureScope sse_scope(tasm(), SSE4_1);
1237       RoundingMode const mode =
1238           static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
1239       __ roundss(i.OutputDoubleRegister(), i.InputDoubleRegister(0), mode);
1240       break;
1241     }
1242     case kSSEFloat64Cmp:
1243       __ ucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
1244       break;
1245     case kSSEFloat64Add:
1246       __ addsd(i.InputDoubleRegister(0), i.InputOperand(1));
1247       break;
1248     case kSSEFloat64Sub:
1249       __ subsd(i.InputDoubleRegister(0), i.InputOperand(1));
1250       break;
1251     case kSSEFloat64Mul:
1252       __ mulsd(i.InputDoubleRegister(0), i.InputOperand(1));
1253       break;
1254     case kSSEFloat64Div:
1255       __ divsd(i.InputDoubleRegister(0), i.InputOperand(1));
1256       // Don't delete this mov. It may improve performance on some CPUs,
1257       // when there is a (v)mulsd depending on the result.
1258       __ movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
1259       break;
1260     case kSSEFloat32Max: {
1261       Label compare_nan, compare_swap, done_compare;
1262       if (instr->InputAt(1)->IsFPRegister()) {
1263         __ ucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1264       } else {
1265         __ ucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
1266       }
1267       auto ool =
1268           new (zone()) OutOfLineLoadFloat32NaN(this, i.OutputDoubleRegister());
1269       __ j(parity_even, ool->entry());
1270       __ j(above, &done_compare, Label::kNear);
1271       __ j(below, &compare_swap, Label::kNear);
1272       __ movmskps(i.TempRegister(0), i.InputDoubleRegister(0));
1273       __ test(i.TempRegister(0), Immediate(1));
1274       __ j(zero, &done_compare, Label::kNear);
1275       __ bind(&compare_swap);
1276       if (instr->InputAt(1)->IsFPRegister()) {
1277         __ movss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1278       } else {
1279         __ movss(i.InputDoubleRegister(0), i.InputOperand(1));
1280       }
1281       __ bind(&done_compare);
1282       __ bind(ool->exit());
1283       break;
1284     }
1285 
1286     case kSSEFloat64Max: {
1287       Label compare_nan, compare_swap, done_compare;
1288       if (instr->InputAt(1)->IsFPRegister()) {
1289         __ ucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1290       } else {
1291         __ ucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
1292       }
1293       auto ool =
1294           new (zone()) OutOfLineLoadFloat64NaN(this, i.OutputDoubleRegister());
1295       __ j(parity_even, ool->entry());
1296       __ j(above, &done_compare, Label::kNear);
1297       __ j(below, &compare_swap, Label::kNear);
1298       __ movmskpd(i.TempRegister(0), i.InputDoubleRegister(0));
1299       __ test(i.TempRegister(0), Immediate(1));
1300       __ j(zero, &done_compare, Label::kNear);
1301       __ bind(&compare_swap);
1302       if (instr->InputAt(1)->IsFPRegister()) {
1303         __ movsd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1304       } else {
1305         __ movsd(i.InputDoubleRegister(0), i.InputOperand(1));
1306       }
1307       __ bind(&done_compare);
1308       __ bind(ool->exit());
1309       break;
1310     }
1311     case kSSEFloat32Min: {
1312       Label compare_swap, done_compare;
1313       if (instr->InputAt(1)->IsFPRegister()) {
1314         __ ucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1315       } else {
1316         __ ucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
1317       }
1318       auto ool =
1319           new (zone()) OutOfLineLoadFloat32NaN(this, i.OutputDoubleRegister());
1320       __ j(parity_even, ool->entry());
1321       __ j(below, &done_compare, Label::kNear);
1322       __ j(above, &compare_swap, Label::kNear);
1323       if (instr->InputAt(1)->IsFPRegister()) {
1324         __ movmskps(i.TempRegister(0), i.InputDoubleRegister(1));
1325       } else {
1326         __ movss(kScratchDoubleReg, i.InputOperand(1));
1327         __ movmskps(i.TempRegister(0), kScratchDoubleReg);
1328       }
1329       __ test(i.TempRegister(0), Immediate(1));
1330       __ j(zero, &done_compare, Label::kNear);
1331       __ bind(&compare_swap);
1332       if (instr->InputAt(1)->IsFPRegister()) {
1333         __ movss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1334       } else {
1335         __ movss(i.InputDoubleRegister(0), i.InputOperand(1));
1336       }
1337       __ bind(&done_compare);
1338       __ bind(ool->exit());
1339       break;
1340     }
1341     case kSSEFloat64Min: {
1342       Label compare_swap, done_compare;
1343       if (instr->InputAt(1)->IsFPRegister()) {
1344         __ ucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1345       } else {
1346         __ ucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
1347       }
1348       auto ool =
1349           new (zone()) OutOfLineLoadFloat64NaN(this, i.OutputDoubleRegister());
1350       __ j(parity_even, ool->entry());
1351       __ j(below, &done_compare, Label::kNear);
1352       __ j(above, &compare_swap, Label::kNear);
1353       if (instr->InputAt(1)->IsFPRegister()) {
1354         __ movmskpd(i.TempRegister(0), i.InputDoubleRegister(1));
1355       } else {
1356         __ movsd(kScratchDoubleReg, i.InputOperand(1));
1357         __ movmskpd(i.TempRegister(0), kScratchDoubleReg);
1358       }
1359       __ test(i.TempRegister(0), Immediate(1));
1360       __ j(zero, &done_compare, Label::kNear);
1361       __ bind(&compare_swap);
1362       if (instr->InputAt(1)->IsFPRegister()) {
1363         __ movsd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1364       } else {
1365         __ movsd(i.InputDoubleRegister(0), i.InputOperand(1));
1366       }
1367       __ bind(&done_compare);
1368       __ bind(ool->exit());
1369       break;
1370     }
1371     case kSSEFloat64Mod: {
1372       // TODO(dcarney): alignment is wrong.
1373       __ sub(esp, Immediate(kDoubleSize));
1374       // Move values to st(0) and st(1).
1375       __ movsd(Operand(esp, 0), i.InputDoubleRegister(1));
1376       __ fld_d(Operand(esp, 0));
1377       __ movsd(Operand(esp, 0), i.InputDoubleRegister(0));
1378       __ fld_d(Operand(esp, 0));
1379       // Loop while fprem isn't done.
1380       Label mod_loop;
1381       __ bind(&mod_loop);
1382       // This instructions traps on all kinds inputs, but we are assuming the
1383       // floating point control word is set to ignore them all.
1384       __ fprem();
1385       // The following 2 instruction implicitly use eax.
1386       __ fnstsw_ax();
1387       __ sahf();
1388       __ j(parity_even, &mod_loop);
1389       // Move output to stack and clean up.
1390       __ fstp(1);
1391       __ fstp_d(Operand(esp, 0));
1392       __ movsd(i.OutputDoubleRegister(), Operand(esp, 0));
1393       __ add(esp, Immediate(kDoubleSize));
1394       break;
1395     }
1396     case kSSEFloat64Abs: {
1397       // TODO(bmeurer): Use 128-bit constants.
1398       __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
1399       __ psrlq(kScratchDoubleReg, 1);
1400       __ andpd(i.OutputDoubleRegister(), kScratchDoubleReg);
1401       break;
1402     }
1403     case kSSEFloat64Neg: {
1404       // TODO(bmeurer): Use 128-bit constants.
1405       __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
1406       __ psllq(kScratchDoubleReg, 63);
1407       __ xorpd(i.OutputDoubleRegister(), kScratchDoubleReg);
1408       break;
1409     }
1410     case kSSEFloat64Sqrt:
1411       __ sqrtsd(i.OutputDoubleRegister(), i.InputOperand(0));
1412       break;
1413     case kSSEFloat64Round: {
1414       CpuFeatureScope sse_scope(tasm(), SSE4_1);
1415       RoundingMode const mode =
1416           static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
1417       __ roundsd(i.OutputDoubleRegister(), i.InputDoubleRegister(0), mode);
1418       break;
1419     }
1420     case kSSEFloat32ToFloat64:
1421       __ cvtss2sd(i.OutputDoubleRegister(), i.InputOperand(0));
1422       break;
1423     case kSSEFloat64ToFloat32:
1424       __ cvtsd2ss(i.OutputDoubleRegister(), i.InputOperand(0));
1425       break;
1426     case kSSEFloat32ToInt32:
1427       __ cvttss2si(i.OutputRegister(), i.InputOperand(0));
1428       break;
1429     case kSSEFloat32ToUint32:
1430       __ Cvttss2ui(i.OutputRegister(), i.InputOperand(0), kScratchDoubleReg);
1431       break;
1432     case kSSEFloat64ToInt32:
1433       __ cvttsd2si(i.OutputRegister(), i.InputOperand(0));
1434       break;
1435     case kSSEFloat64ToUint32:
1436       __ Cvttsd2ui(i.OutputRegister(), i.InputOperand(0), kScratchDoubleReg);
1437       break;
1438     case kSSEInt32ToFloat32:
1439       __ cvtsi2ss(i.OutputDoubleRegister(), i.InputOperand(0));
1440       break;
1441     case kSSEUint32ToFloat32:
1442       __ Cvtui2ss(i.OutputDoubleRegister(), i.InputOperand(0),
1443                   i.TempRegister(0));
1444       break;
1445     case kSSEInt32ToFloat64:
1446       __ cvtsi2sd(i.OutputDoubleRegister(), i.InputOperand(0));
1447       break;
1448     case kSSEUint32ToFloat64:
1449       __ Cvtui2sd(i.OutputDoubleRegister(), i.InputOperand(0));
1450       break;
1451     case kSSEFloat64ExtractLowWord32:
1452       if (instr->InputAt(0)->IsFPStackSlot()) {
1453         __ mov(i.OutputRegister(), i.InputOperand(0));
1454       } else {
1455         __ movd(i.OutputRegister(), i.InputDoubleRegister(0));
1456       }
1457       break;
1458     case kSSEFloat64ExtractHighWord32:
1459       if (instr->InputAt(0)->IsFPStackSlot()) {
1460         __ mov(i.OutputRegister(), i.InputOperand(0, kDoubleSize / 2));
1461       } else {
1462         __ Pextrd(i.OutputRegister(), i.InputDoubleRegister(0), 1);
1463       }
1464       break;
1465     case kSSEFloat64InsertLowWord32:
1466       __ Pinsrd(i.OutputDoubleRegister(), i.InputOperand(1), 0, true);
1467       break;
1468     case kSSEFloat64InsertHighWord32:
1469       __ Pinsrd(i.OutputDoubleRegister(), i.InputOperand(1), 1, true);
1470       break;
1471     case kSSEFloat64LoadLowWord32:
1472       __ movd(i.OutputDoubleRegister(), i.InputOperand(0));
1473       break;
1474     case kAVXFloat32Add: {
1475       CpuFeatureScope avx_scope(tasm(), AVX);
1476       __ vaddss(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1477                 i.InputOperand(1));
1478       break;
1479     }
1480     case kAVXFloat32Sub: {
1481       CpuFeatureScope avx_scope(tasm(), AVX);
1482       __ vsubss(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1483                 i.InputOperand(1));
1484       break;
1485     }
1486     case kAVXFloat32Mul: {
1487       CpuFeatureScope avx_scope(tasm(), AVX);
1488       __ vmulss(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1489                 i.InputOperand(1));
1490       break;
1491     }
1492     case kAVXFloat32Div: {
1493       CpuFeatureScope avx_scope(tasm(), AVX);
1494       __ vdivss(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1495                 i.InputOperand(1));
1496       // Don't delete this mov. It may improve performance on some CPUs,
1497       // when there is a (v)mulss depending on the result.
1498       __ movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
1499       break;
1500     }
1501     case kAVXFloat64Add: {
1502       CpuFeatureScope avx_scope(tasm(), AVX);
1503       __ vaddsd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1504                 i.InputOperand(1));
1505       break;
1506     }
1507     case kAVXFloat64Sub: {
1508       CpuFeatureScope avx_scope(tasm(), AVX);
1509       __ vsubsd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1510                 i.InputOperand(1));
1511       break;
1512     }
1513     case kAVXFloat64Mul: {
1514       CpuFeatureScope avx_scope(tasm(), AVX);
1515       __ vmulsd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1516                 i.InputOperand(1));
1517       break;
1518     }
1519     case kAVXFloat64Div: {
1520       CpuFeatureScope avx_scope(tasm(), AVX);
1521       __ vdivsd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1522                 i.InputOperand(1));
1523       // Don't delete this mov. It may improve performance on some CPUs,
1524       // when there is a (v)mulsd depending on the result.
1525       __ movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
1526       break;
1527     }
1528     case kAVXFloat32Abs: {
1529       // TODO(bmeurer): Use RIP relative 128-bit constants.
1530       __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
1531       __ psrlq(kScratchDoubleReg, 33);
1532       CpuFeatureScope avx_scope(tasm(), AVX);
1533       __ vandps(i.OutputDoubleRegister(), kScratchDoubleReg, i.InputOperand(0));
1534       break;
1535     }
1536     case kAVXFloat32Neg: {
1537       // TODO(bmeurer): Use RIP relative 128-bit constants.
1538       __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
1539       __ psllq(kScratchDoubleReg, 31);
1540       CpuFeatureScope avx_scope(tasm(), AVX);
1541       __ vxorps(i.OutputDoubleRegister(), kScratchDoubleReg, i.InputOperand(0));
1542       break;
1543     }
1544     case kAVXFloat64Abs: {
1545       // TODO(bmeurer): Use RIP relative 128-bit constants.
1546       __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
1547       __ psrlq(kScratchDoubleReg, 1);
1548       CpuFeatureScope avx_scope(tasm(), AVX);
1549       __ vandpd(i.OutputDoubleRegister(), kScratchDoubleReg, i.InputOperand(0));
1550       break;
1551     }
1552     case kAVXFloat64Neg: {
1553       // TODO(bmeurer): Use RIP relative 128-bit constants.
1554       __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
1555       __ psllq(kScratchDoubleReg, 63);
1556       CpuFeatureScope avx_scope(tasm(), AVX);
1557       __ vxorpd(i.OutputDoubleRegister(), kScratchDoubleReg, i.InputOperand(0));
1558       break;
1559     }
1560     case kSSEFloat64SilenceNaN:
1561       __ xorpd(kScratchDoubleReg, kScratchDoubleReg);
1562       __ subsd(i.InputDoubleRegister(0), kScratchDoubleReg);
1563       break;
1564     case kIA32Movsxbl:
1565       ASSEMBLE_MOVX(movsx_b);
1566       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1567       break;
1568     case kIA32Movzxbl:
1569       ASSEMBLE_MOVX(movzx_b);
1570       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1571       break;
1572     case kIA32Movb: {
1573       size_t index = 0;
1574       Operand operand = i.MemoryOperand(&index);
1575       if (HasImmediateInput(instr, index)) {
1576         __ mov_b(operand, i.InputInt8(index));
1577       } else {
1578         __ mov_b(operand, i.InputRegister(index));
1579       }
1580       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1581       break;
1582     }
1583     case kIA32Movsxwl:
1584       ASSEMBLE_MOVX(movsx_w);
1585       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1586       break;
1587     case kIA32Movzxwl:
1588       ASSEMBLE_MOVX(movzx_w);
1589       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1590       break;
1591     case kIA32Movw: {
1592       size_t index = 0;
1593       Operand operand = i.MemoryOperand(&index);
1594       if (HasImmediateInput(instr, index)) {
1595         __ mov_w(operand, i.InputInt16(index));
1596       } else {
1597         __ mov_w(operand, i.InputRegister(index));
1598       }
1599       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1600       break;
1601     }
1602     case kIA32Movl:
1603       if (instr->HasOutput()) {
1604         __ mov(i.OutputRegister(), i.MemoryOperand());
1605         EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1606       } else {
1607         size_t index = 0;
1608         Operand operand = i.MemoryOperand(&index);
1609         if (HasImmediateInput(instr, index)) {
1610           __ mov(operand, i.InputImmediate(index));
1611         } else {
1612           __ mov(operand, i.InputRegister(index));
1613         }
1614       }
1615       break;
1616     case kIA32Movsd:
1617       if (instr->HasOutput()) {
1618         __ movsd(i.OutputDoubleRegister(), i.MemoryOperand());
1619       } else {
1620         size_t index = 0;
1621         Operand operand = i.MemoryOperand(&index);
1622         __ movsd(operand, i.InputDoubleRegister(index));
1623       }
1624       break;
1625     case kIA32Movss:
1626       if (instr->HasOutput()) {
1627         __ movss(i.OutputDoubleRegister(), i.MemoryOperand());
1628       } else {
1629         size_t index = 0;
1630         Operand operand = i.MemoryOperand(&index);
1631         __ movss(operand, i.InputDoubleRegister(index));
1632       }
1633       break;
1634     case kIA32Movdqu:
1635       if (instr->HasOutput()) {
1636         __ Movdqu(i.OutputSimd128Register(), i.MemoryOperand());
1637       } else {
1638         size_t index = 0;
1639         Operand operand = i.MemoryOperand(&index);
1640         __ Movdqu(operand, i.InputSimd128Register(index));
1641       }
1642       break;
1643     case kIA32BitcastFI:
1644       if (instr->InputAt(0)->IsFPStackSlot()) {
1645         __ mov(i.OutputRegister(), i.InputOperand(0));
1646       } else {
1647         __ movd(i.OutputRegister(), i.InputDoubleRegister(0));
1648       }
1649       break;
1650     case kIA32BitcastIF:
1651       if (instr->InputAt(0)->IsRegister()) {
1652         __ movd(i.OutputDoubleRegister(), i.InputRegister(0));
1653       } else {
1654         __ movss(i.OutputDoubleRegister(), i.InputOperand(0));
1655       }
1656       break;
1657     case kIA32Lea: {
1658       AddressingMode mode = AddressingModeField::decode(instr->opcode());
1659       // Shorten "leal" to "addl", "subl" or "shll" if the register allocation
1660       // and addressing mode just happens to work out. The "addl"/"subl" forms
1661       // in these cases are faster based on measurements.
1662       if (mode == kMode_MI) {
1663         __ Move(i.OutputRegister(), Immediate(i.InputInt32(0)));
1664       } else if (i.InputRegister(0) == i.OutputRegister()) {
1665         if (mode == kMode_MRI) {
1666           int32_t constant_summand = i.InputInt32(1);
1667           if (constant_summand > 0) {
1668             __ add(i.OutputRegister(), Immediate(constant_summand));
1669           } else if (constant_summand < 0) {
1670             __ sub(i.OutputRegister(), Immediate(-constant_summand));
1671           }
1672         } else if (mode == kMode_MR1) {
1673           if (i.InputRegister(1) == i.OutputRegister()) {
1674             __ shl(i.OutputRegister(), 1);
1675           } else {
1676             __ add(i.OutputRegister(), i.InputRegister(1));
1677           }
1678         } else if (mode == kMode_M2) {
1679           __ shl(i.OutputRegister(), 1);
1680         } else if (mode == kMode_M4) {
1681           __ shl(i.OutputRegister(), 2);
1682         } else if (mode == kMode_M8) {
1683           __ shl(i.OutputRegister(), 3);
1684         } else {
1685           __ lea(i.OutputRegister(), i.MemoryOperand());
1686         }
1687       } else if (mode == kMode_MR1 &&
1688                  i.InputRegister(1) == i.OutputRegister()) {
1689         __ add(i.OutputRegister(), i.InputRegister(0));
1690       } else {
1691         __ lea(i.OutputRegister(), i.MemoryOperand());
1692       }
1693       break;
1694     }
1695     case kIA32PushFloat32:
1696       if (instr->InputAt(0)->IsFPRegister()) {
1697         __ sub(esp, Immediate(kFloatSize));
1698         __ movss(Operand(esp, 0), i.InputDoubleRegister(0));
1699         frame_access_state()->IncreaseSPDelta(kFloatSize / kPointerSize);
1700       } else if (HasImmediateInput(instr, 0)) {
1701         __ Move(kScratchDoubleReg, i.InputFloat32(0));
1702         __ sub(esp, Immediate(kFloatSize));
1703         __ movss(Operand(esp, 0), kScratchDoubleReg);
1704         frame_access_state()->IncreaseSPDelta(kFloatSize / kPointerSize);
1705       } else {
1706         __ movss(kScratchDoubleReg, i.InputOperand(0));
1707         __ sub(esp, Immediate(kFloatSize));
1708         __ movss(Operand(esp, 0), kScratchDoubleReg);
1709         frame_access_state()->IncreaseSPDelta(kFloatSize / kPointerSize);
1710       }
1711       break;
1712     case kIA32PushFloat64:
1713       if (instr->InputAt(0)->IsFPRegister()) {
1714         __ sub(esp, Immediate(kDoubleSize));
1715         __ movsd(Operand(esp, 0), i.InputDoubleRegister(0));
1716         frame_access_state()->IncreaseSPDelta(kDoubleSize / kPointerSize);
1717       } else if (HasImmediateInput(instr, 0)) {
1718         __ Move(kScratchDoubleReg, i.InputDouble(0));
1719         __ sub(esp, Immediate(kDoubleSize));
1720         __ movsd(Operand(esp, 0), kScratchDoubleReg);
1721         frame_access_state()->IncreaseSPDelta(kDoubleSize / kPointerSize);
1722       } else {
1723         __ movsd(kScratchDoubleReg, i.InputOperand(0));
1724         __ sub(esp, Immediate(kDoubleSize));
1725         __ movsd(Operand(esp, 0), kScratchDoubleReg);
1726         frame_access_state()->IncreaseSPDelta(kDoubleSize / kPointerSize);
1727       }
1728       break;
1729     case kIA32PushSimd128:
1730       if (instr->InputAt(0)->IsFPRegister()) {
1731         __ sub(esp, Immediate(kSimd128Size));
1732         __ movups(Operand(esp, 0), i.InputSimd128Register(0));
1733       } else {
1734         __ movups(kScratchDoubleReg, i.InputOperand(0));
1735         __ sub(esp, Immediate(kSimd128Size));
1736         __ movups(Operand(esp, 0), kScratchDoubleReg);
1737       }
1738       frame_access_state()->IncreaseSPDelta(kSimd128Size / kPointerSize);
1739       break;
1740     case kIA32Push:
1741       if (AddressingModeField::decode(instr->opcode()) != kMode_None) {
1742         size_t index = 0;
1743         Operand operand = i.MemoryOperand(&index);
1744         __ push(operand);
1745         frame_access_state()->IncreaseSPDelta(kFloatSize / kPointerSize);
1746       } else if (instr->InputAt(0)->IsFPRegister()) {
1747         __ sub(esp, Immediate(kFloatSize));
1748         __ movsd(Operand(esp, 0), i.InputDoubleRegister(0));
1749         frame_access_state()->IncreaseSPDelta(kFloatSize / kPointerSize);
1750       } else if (HasImmediateInput(instr, 0)) {
1751         __ push(i.InputImmediate(0));
1752         frame_access_state()->IncreaseSPDelta(1);
1753       } else {
1754         __ push(i.InputOperand(0));
1755         frame_access_state()->IncreaseSPDelta(1);
1756       }
1757       break;
1758     case kIA32Poke: {
1759       int slot = MiscField::decode(instr->opcode());
1760       if (HasImmediateInput(instr, 0)) {
1761         __ mov(Operand(esp, slot * kPointerSize), i.InputImmediate(0));
1762       } else {
1763         __ mov(Operand(esp, slot * kPointerSize), i.InputRegister(0));
1764       }
1765       break;
1766     }
1767     case kIA32Peek: {
1768       int reverse_slot = i.InputInt32(0) + 1;
1769       int offset =
1770           FrameSlotToFPOffset(frame()->GetTotalFrameSlotCount() - reverse_slot);
1771       if (instr->OutputAt(0)->IsFPRegister()) {
1772         LocationOperand* op = LocationOperand::cast(instr->OutputAt(0));
1773         if (op->representation() == MachineRepresentation::kFloat64) {
1774           __ movsd(i.OutputDoubleRegister(), Operand(ebp, offset));
1775         } else {
1776           DCHECK_EQ(MachineRepresentation::kFloat32, op->representation());
1777           __ movss(i.OutputFloatRegister(), Operand(ebp, offset));
1778         }
1779       } else {
1780         __ mov(i.OutputRegister(), Operand(ebp, offset));
1781       }
1782       break;
1783     }
1784     case kSSEF32x4Splat: {
1785       DCHECK_EQ(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
1786       XMMRegister dst = i.OutputSimd128Register();
1787       __ shufps(dst, dst, 0x0);
1788       break;
1789     }
1790     case kAVXF32x4Splat: {
1791       CpuFeatureScope avx_scope(tasm(), AVX);
1792       XMMRegister src = i.InputFloatRegister(0);
1793       __ vshufps(i.OutputSimd128Register(), src, src, 0x0);
1794       break;
1795     }
1796     case kSSEF32x4ExtractLane: {
1797       DCHECK_EQ(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
1798       XMMRegister dst = i.OutputFloatRegister();
1799       int8_t lane = i.InputInt8(1);
1800       if (lane != 0) {
1801         DCHECK_LT(lane, 4);
1802         __ shufps(dst, dst, lane);
1803       }
1804       break;
1805     }
1806     case kAVXF32x4ExtractLane: {
1807       CpuFeatureScope avx_scope(tasm(), AVX);
1808       XMMRegister dst = i.OutputFloatRegister();
1809       XMMRegister src = i.InputSimd128Register(0);
1810       int8_t lane = i.InputInt8(1);
1811       if (lane == 0) {
1812         if (dst != src) __ vmovaps(dst, src);
1813       } else {
1814         DCHECK_LT(lane, 4);
1815         __ vshufps(dst, src, src, lane);
1816       }
1817       break;
1818     }
1819     case kSSEF32x4ReplaceLane: {
1820       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
1821       CpuFeatureScope sse_scope(tasm(), SSE4_1);
1822       __ insertps(i.OutputSimd128Register(), i.InputOperand(2),
1823                   i.InputInt8(1) << 4);
1824       break;
1825     }
1826     case kAVXF32x4ReplaceLane: {
1827       CpuFeatureScope avx_scope(tasm(), AVX);
1828       __ vinsertps(i.OutputSimd128Register(), i.InputSimd128Register(0),
1829                    i.InputOperand(2), i.InputInt8(1) << 4);
1830       break;
1831     }
1832     case kIA32F32x4SConvertI32x4: {
1833       __ Cvtdq2ps(i.OutputSimd128Register(), i.InputOperand(0));
1834       break;
1835     }
1836     case kSSEF32x4UConvertI32x4: {
1837       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
1838       CpuFeatureScope sse_scope(tasm(), SSE4_1);
1839       XMMRegister dst = i.OutputSimd128Register();
1840       __ pxor(kScratchDoubleReg, kScratchDoubleReg);      // zeros
1841       __ pblendw(kScratchDoubleReg, dst, 0x55);           // get lo 16 bits
1842       __ psubd(dst, kScratchDoubleReg);                   // get hi 16 bits
1843       __ cvtdq2ps(kScratchDoubleReg, kScratchDoubleReg);  // convert lo exactly
1844       __ psrld(dst, 1);                  // divide by 2 to get in unsigned range
1845       __ cvtdq2ps(dst, dst);             // convert hi exactly
1846       __ addps(dst, dst);                // double hi, exactly
1847       __ addps(dst, kScratchDoubleReg);  // add hi and lo, may round.
1848       break;
1849     }
1850     case kAVXF32x4UConvertI32x4: {
1851       CpuFeatureScope avx_scope(tasm(), AVX);
1852       XMMRegister dst = i.OutputSimd128Register();
1853       XMMRegister src = i.InputSimd128Register(0);
1854       __ vpxor(kScratchDoubleReg, kScratchDoubleReg,
1855                kScratchDoubleReg);  // zeros
1856       __ vpblendw(kScratchDoubleReg, kScratchDoubleReg, src,
1857                   0x55);                                   // get lo 16 bits
1858       __ vpsubd(dst, src, kScratchDoubleReg);              // get hi 16 bits
1859       __ vcvtdq2ps(kScratchDoubleReg, kScratchDoubleReg);  // convert lo exactly
1860       __ vpsrld(dst, dst, 1);    // divide by 2 to get in unsigned range
1861       __ vcvtdq2ps(dst, dst);    // convert hi exactly
1862       __ vaddps(dst, dst, dst);  // double hi, exactly
1863       __ vaddps(dst, dst, kScratchDoubleReg);  // add hi and lo, may round.
1864       break;
1865     }
1866     case kSSEF32x4Abs: {
1867       XMMRegister dst = i.OutputSimd128Register();
1868       Operand src = i.InputOperand(0);
1869       if (src.is_reg(dst)) {
1870         __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
1871         __ psrld(kScratchDoubleReg, 1);
1872         __ andps(dst, kScratchDoubleReg);
1873       } else {
1874         __ pcmpeqd(dst, dst);
1875         __ psrld(dst, 1);
1876         __ andps(dst, src);
1877       }
1878       break;
1879     }
1880     case kAVXF32x4Abs: {
1881       CpuFeatureScope avx_scope(tasm(), AVX);
1882       __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
1883       __ vpsrld(kScratchDoubleReg, kScratchDoubleReg, 1);
1884       __ vandps(i.OutputSimd128Register(), kScratchDoubleReg,
1885                 i.InputOperand(0));
1886       break;
1887     }
1888     case kSSEF32x4Neg: {
1889       XMMRegister dst = i.OutputSimd128Register();
1890       Operand src = i.InputOperand(0);
1891       if (src.is_reg(dst)) {
1892         __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
1893         __ pslld(kScratchDoubleReg, 31);
1894         __ xorps(dst, kScratchDoubleReg);
1895       } else {
1896         __ pcmpeqd(dst, dst);
1897         __ pslld(dst, 31);
1898         __ xorps(dst, src);
1899       }
1900       break;
1901     }
1902     case kAVXF32x4Neg: {
1903       CpuFeatureScope avx_scope(tasm(), AVX);
1904       __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
1905       __ vpslld(kScratchDoubleReg, kScratchDoubleReg, 31);
1906       __ vxorps(i.OutputSimd128Register(), kScratchDoubleReg,
1907                 i.InputOperand(0));
1908       break;
1909     }
1910     case kIA32F32x4RecipApprox: {
1911       __ Rcpps(i.OutputSimd128Register(), i.InputOperand(0));
1912       break;
1913     }
1914     case kIA32F32x4RecipSqrtApprox: {
1915       __ Rsqrtps(i.OutputSimd128Register(), i.InputOperand(0));
1916       break;
1917     }
1918     case kSSEF32x4Add: {
1919       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
1920       __ addps(i.OutputSimd128Register(), i.InputOperand(1));
1921       break;
1922     }
1923     case kAVXF32x4Add: {
1924       CpuFeatureScope avx_scope(tasm(), AVX);
1925       __ vaddps(i.OutputSimd128Register(), i.InputSimd128Register(0),
1926                 i.InputOperand(1));
1927       break;
1928     }
1929     case kSSEF32x4AddHoriz: {
1930       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
1931       CpuFeatureScope sse_scope(tasm(), SSE3);
1932       __ haddps(i.OutputSimd128Register(), i.InputOperand(1));
1933       break;
1934     }
1935     case kAVXF32x4AddHoriz: {
1936       CpuFeatureScope avx_scope(tasm(), AVX);
1937       __ vhaddps(i.OutputSimd128Register(), i.InputSimd128Register(0),
1938                  i.InputOperand(1));
1939       break;
1940     }
1941     case kSSEF32x4Sub: {
1942       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
1943       __ subps(i.OutputSimd128Register(), i.InputOperand(1));
1944       break;
1945     }
1946     case kAVXF32x4Sub: {
1947       CpuFeatureScope avx_scope(tasm(), AVX);
1948       __ vsubps(i.OutputSimd128Register(), i.InputSimd128Register(0),
1949                 i.InputOperand(1));
1950       break;
1951     }
1952     case kSSEF32x4Mul: {
1953       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
1954       __ mulps(i.OutputSimd128Register(), i.InputOperand(1));
1955       break;
1956     }
1957     case kAVXF32x4Mul: {
1958       CpuFeatureScope avx_scope(tasm(), AVX);
1959       __ vmulps(i.OutputSimd128Register(), i.InputSimd128Register(0),
1960                 i.InputOperand(1));
1961       break;
1962     }
1963     case kSSEF32x4Min: {
1964       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
1965       __ minps(i.OutputSimd128Register(), i.InputOperand(1));
1966       break;
1967     }
1968     case kAVXF32x4Min: {
1969       CpuFeatureScope avx_scope(tasm(), AVX);
1970       __ vminps(i.OutputSimd128Register(), i.InputSimd128Register(0),
1971                 i.InputOperand(1));
1972       break;
1973     }
1974     case kSSEF32x4Max: {
1975       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
1976       __ maxps(i.OutputSimd128Register(), i.InputOperand(1));
1977       break;
1978     }
1979     case kAVXF32x4Max: {
1980       CpuFeatureScope avx_scope(tasm(), AVX);
1981       __ vmaxps(i.OutputSimd128Register(), i.InputSimd128Register(0),
1982                 i.InputOperand(1));
1983       break;
1984     }
1985     case kSSEF32x4Eq: {
1986       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
1987       __ cmpeqps(i.OutputSimd128Register(), i.InputOperand(1));
1988       break;
1989     }
1990     case kAVXF32x4Eq: {
1991       CpuFeatureScope avx_scope(tasm(), AVX);
1992       __ vcmpeqps(i.OutputSimd128Register(), i.InputSimd128Register(0),
1993                   i.InputOperand(1));
1994       break;
1995     }
1996     case kSSEF32x4Ne: {
1997       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
1998       __ cmpneqps(i.OutputSimd128Register(), i.InputOperand(1));
1999       break;
2000     }
2001     case kAVXF32x4Ne: {
2002       CpuFeatureScope avx_scope(tasm(), AVX);
2003       __ vcmpneqps(i.OutputSimd128Register(), i.InputSimd128Register(0),
2004                    i.InputOperand(1));
2005       break;
2006     }
2007     case kSSEF32x4Lt: {
2008       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2009       __ cmpltps(i.OutputSimd128Register(), i.InputOperand(1));
2010       break;
2011     }
2012     case kAVXF32x4Lt: {
2013       CpuFeatureScope avx_scope(tasm(), AVX);
2014       __ vcmpltps(i.OutputSimd128Register(), i.InputSimd128Register(0),
2015                   i.InputOperand(1));
2016       break;
2017     }
2018     case kSSEF32x4Le: {
2019       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2020       __ cmpleps(i.OutputSimd128Register(), i.InputOperand(1));
2021       break;
2022     }
2023     case kAVXF32x4Le: {
2024       CpuFeatureScope avx_scope(tasm(), AVX);
2025       __ vcmpleps(i.OutputSimd128Register(), i.InputSimd128Register(0),
2026                   i.InputOperand(1));
2027       break;
2028     }
2029     case kIA32I32x4Splat: {
2030       XMMRegister dst = i.OutputSimd128Register();
2031       __ Movd(dst, i.InputOperand(0));
2032       __ Pshufd(dst, dst, 0x0);
2033       break;
2034     }
2035     case kIA32I32x4ExtractLane: {
2036       __ Pextrd(i.OutputRegister(), i.InputSimd128Register(0), i.InputInt8(1));
2037       break;
2038     }
2039     case kSSEI32x4ReplaceLane: {
2040       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2041       CpuFeatureScope sse_scope(tasm(), SSE4_1);
2042       __ pinsrd(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
2043       break;
2044     }
2045     case kAVXI32x4ReplaceLane: {
2046       CpuFeatureScope avx_scope(tasm(), AVX);
2047       __ vpinsrd(i.OutputSimd128Register(), i.InputSimd128Register(0),
2048                  i.InputOperand(2), i.InputInt8(1));
2049       break;
2050     }
2051     case kSSEI32x4SConvertF32x4: {
2052       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2053       XMMRegister dst = i.OutputSimd128Register();
2054       // NAN->0
2055       __ movaps(kScratchDoubleReg, dst);
2056       __ cmpeqps(kScratchDoubleReg, kScratchDoubleReg);
2057       __ pand(dst, kScratchDoubleReg);
2058       // Set top bit if >= 0 (but not -0.0!)
2059       __ pxor(kScratchDoubleReg, dst);
2060       // Convert
2061       __ cvttps2dq(dst, dst);
2062       // Set top bit if >=0 is now < 0
2063       __ pand(kScratchDoubleReg, dst);
2064       __ psrad(kScratchDoubleReg, 31);
2065       // Set positive overflow lanes to 0x7FFFFFFF
2066       __ pxor(dst, kScratchDoubleReg);
2067       break;
2068     }
2069     case kAVXI32x4SConvertF32x4: {
2070       CpuFeatureScope avx_scope(tasm(), AVX);
2071       XMMRegister dst = i.OutputSimd128Register();
2072       XMMRegister src = i.InputSimd128Register(0);
2073       // NAN->0
2074       __ vcmpeqps(kScratchDoubleReg, src, src);
2075       __ vpand(dst, src, kScratchDoubleReg);
2076       // Set top bit if >= 0 (but not -0.0!)
2077       __ vpxor(kScratchDoubleReg, kScratchDoubleReg, dst);
2078       // Convert
2079       __ vcvttps2dq(dst, dst);
2080       // Set top bit if >=0 is now < 0
2081       __ vpand(kScratchDoubleReg, kScratchDoubleReg, dst);
2082       __ vpsrad(kScratchDoubleReg, kScratchDoubleReg, 31);
2083       // Set positive overflow lanes to 0x7FFFFFFF
2084       __ vpxor(dst, dst, kScratchDoubleReg);
2085       break;
2086     }
2087     case kIA32I32x4SConvertI16x8Low: {
2088       __ Pmovsxwd(i.OutputSimd128Register(), i.InputOperand(0));
2089       break;
2090     }
2091     case kIA32I32x4SConvertI16x8High: {
2092       XMMRegister dst = i.OutputSimd128Register();
2093       __ Palignr(dst, i.InputOperand(0), 8);
2094       __ Pmovsxwd(dst, dst);
2095       break;
2096     }
2097     case kIA32I32x4Neg: {
2098       XMMRegister dst = i.OutputSimd128Register();
2099       Operand src = i.InputOperand(0);
2100       if (src.is_reg(dst)) {
2101         __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2102         __ Psignd(dst, kScratchDoubleReg);
2103       } else {
2104         __ Pxor(dst, dst);
2105         __ Psubd(dst, src);
2106       }
2107       break;
2108     }
2109     case kSSEI32x4Shl: {
2110       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2111       __ pslld(i.OutputSimd128Register(), i.InputInt8(1));
2112       break;
2113     }
2114     case kAVXI32x4Shl: {
2115       CpuFeatureScope avx_scope(tasm(), AVX);
2116       __ vpslld(i.OutputSimd128Register(), i.InputSimd128Register(0),
2117                 i.InputInt8(1));
2118       break;
2119     }
2120     case kSSEI32x4ShrS: {
2121       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2122       __ psrad(i.OutputSimd128Register(), i.InputInt8(1));
2123       break;
2124     }
2125     case kAVXI32x4ShrS: {
2126       CpuFeatureScope avx_scope(tasm(), AVX);
2127       __ vpsrad(i.OutputSimd128Register(), i.InputSimd128Register(0),
2128                 i.InputInt8(1));
2129       break;
2130     }
2131     case kSSEI32x4Add: {
2132       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2133       __ paddd(i.OutputSimd128Register(), i.InputOperand(1));
2134       break;
2135     }
2136     case kAVXI32x4Add: {
2137       CpuFeatureScope avx_scope(tasm(), AVX);
2138       __ vpaddd(i.OutputSimd128Register(), i.InputSimd128Register(0),
2139                 i.InputOperand(1));
2140       break;
2141     }
2142     case kSSEI32x4AddHoriz: {
2143       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2144       CpuFeatureScope sse_scope(tasm(), SSSE3);
2145       __ phaddd(i.OutputSimd128Register(), i.InputOperand(1));
2146       break;
2147     }
2148     case kAVXI32x4AddHoriz: {
2149       CpuFeatureScope avx_scope(tasm(), AVX);
2150       __ vphaddd(i.OutputSimd128Register(), i.InputSimd128Register(0),
2151                  i.InputOperand(1));
2152       break;
2153     }
2154     case kSSEI32x4Sub: {
2155       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2156       __ psubd(i.OutputSimd128Register(), i.InputOperand(1));
2157       break;
2158     }
2159     case kAVXI32x4Sub: {
2160       CpuFeatureScope avx_scope(tasm(), AVX);
2161       __ vpsubd(i.OutputSimd128Register(), i.InputSimd128Register(0),
2162                 i.InputOperand(1));
2163       break;
2164     }
2165     case kSSEI32x4Mul: {
2166       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2167       CpuFeatureScope sse_scope(tasm(), SSE4_1);
2168       __ pmulld(i.OutputSimd128Register(), i.InputOperand(1));
2169       break;
2170     }
2171     case kAVXI32x4Mul: {
2172       CpuFeatureScope avx_scope(tasm(), AVX);
2173       __ vpmulld(i.OutputSimd128Register(), i.InputSimd128Register(0),
2174                  i.InputOperand(1));
2175       break;
2176     }
2177     case kSSEI32x4MinS: {
2178       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2179       CpuFeatureScope sse_scope(tasm(), SSE4_1);
2180       __ pminsd(i.OutputSimd128Register(), i.InputOperand(1));
2181       break;
2182     }
2183     case kAVXI32x4MinS: {
2184       CpuFeatureScope avx_scope(tasm(), AVX);
2185       __ vpminsd(i.OutputSimd128Register(), i.InputSimd128Register(0),
2186                  i.InputOperand(1));
2187       break;
2188     }
2189     case kSSEI32x4MaxS: {
2190       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2191       CpuFeatureScope sse_scope(tasm(), SSE4_1);
2192       __ pmaxsd(i.OutputSimd128Register(), i.InputOperand(1));
2193       break;
2194     }
2195     case kAVXI32x4MaxS: {
2196       CpuFeatureScope avx_scope(tasm(), AVX);
2197       __ vpmaxsd(i.OutputSimd128Register(), i.InputSimd128Register(0),
2198                  i.InputOperand(1));
2199       break;
2200     }
2201     case kSSEI32x4Eq: {
2202       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2203       __ pcmpeqd(i.OutputSimd128Register(), i.InputOperand(1));
2204       break;
2205     }
2206     case kAVXI32x4Eq: {
2207       CpuFeatureScope avx_scope(tasm(), AVX);
2208       __ vpcmpeqd(i.OutputSimd128Register(), i.InputSimd128Register(0),
2209                   i.InputOperand(1));
2210       break;
2211     }
2212     case kSSEI32x4Ne: {
2213       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2214       __ pcmpeqd(i.OutputSimd128Register(), i.InputOperand(1));
2215       __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2216       __ pxor(i.OutputSimd128Register(), kScratchDoubleReg);
2217       break;
2218     }
2219     case kAVXI32x4Ne: {
2220       CpuFeatureScope avx_scope(tasm(), AVX);
2221       __ vpcmpeqd(i.OutputSimd128Register(), i.InputSimd128Register(0),
2222                   i.InputOperand(1));
2223       __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
2224       __ vpxor(i.OutputSimd128Register(), i.OutputSimd128Register(),
2225                kScratchDoubleReg);
2226       break;
2227     }
2228     case kSSEI32x4GtS: {
2229       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2230       __ pcmpgtd(i.OutputSimd128Register(), i.InputOperand(1));
2231       break;
2232     }
2233     case kAVXI32x4GtS: {
2234       CpuFeatureScope avx_scope(tasm(), AVX);
2235       __ vpcmpgtd(i.OutputSimd128Register(), i.InputSimd128Register(0),
2236                   i.InputOperand(1));
2237       break;
2238     }
2239     case kSSEI32x4GeS: {
2240       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2241       CpuFeatureScope sse_scope(tasm(), SSE4_1);
2242       XMMRegister dst = i.OutputSimd128Register();
2243       Operand src = i.InputOperand(1);
2244       __ pminsd(dst, src);
2245       __ pcmpeqd(dst, src);
2246       break;
2247     }
2248     case kAVXI32x4GeS: {
2249       CpuFeatureScope avx_scope(tasm(), AVX);
2250       XMMRegister src1 = i.InputSimd128Register(0);
2251       Operand src2 = i.InputOperand(1);
2252       __ vpminsd(kScratchDoubleReg, src1, src2);
2253       __ vpcmpeqd(i.OutputSimd128Register(), kScratchDoubleReg, src2);
2254       break;
2255     }
2256     case kSSEI32x4UConvertF32x4: {
2257       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2258       CpuFeatureScope sse_scope(tasm(), SSE4_1);
2259       XMMRegister dst = i.OutputSimd128Register();
2260       XMMRegister tmp = i.ToSimd128Register(instr->TempAt(0));
2261       // NAN->0, negative->0
2262       __ pxor(kScratchDoubleReg, kScratchDoubleReg);
2263       __ maxps(dst, kScratchDoubleReg);
2264       // scratch: float representation of max_signed
2265       __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2266       __ psrld(kScratchDoubleReg, 1);                     // 0x7fffffff
2267       __ cvtdq2ps(kScratchDoubleReg, kScratchDoubleReg);  // 0x4f000000
2268       // tmp: convert (src-max_signed).
2269       // Positive overflow lanes -> 0x7FFFFFFF
2270       // Negative lanes -> 0
2271       __ movaps(tmp, dst);
2272       __ subps(tmp, kScratchDoubleReg);
2273       __ cmpleps(kScratchDoubleReg, tmp);
2274       __ cvttps2dq(tmp, tmp);
2275       __ pxor(tmp, kScratchDoubleReg);
2276       __ pxor(kScratchDoubleReg, kScratchDoubleReg);
2277       __ pmaxsd(tmp, kScratchDoubleReg);
2278       // convert. Overflow lanes above max_signed will be 0x80000000
2279       __ cvttps2dq(dst, dst);
2280       // Add (src-max_signed) for overflow lanes.
2281       __ paddd(dst, tmp);
2282       break;
2283     }
2284     case kAVXI32x4UConvertF32x4: {
2285       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2286       CpuFeatureScope avx_scope(tasm(), AVX);
2287       XMMRegister dst = i.OutputSimd128Register();
2288       XMMRegister tmp = i.ToSimd128Register(instr->TempAt(0));
2289       // NAN->0, negative->0
2290       __ vpxor(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
2291       __ vmaxps(dst, dst, kScratchDoubleReg);
2292       // scratch: float representation of max_signed
2293       __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
2294       __ vpsrld(kScratchDoubleReg, kScratchDoubleReg, 1);  // 0x7fffffff
2295       __ vcvtdq2ps(kScratchDoubleReg, kScratchDoubleReg);  // 0x4f000000
2296       // tmp: convert (src-max_signed).
2297       // Positive overflow lanes -> 0x7FFFFFFF
2298       // Negative lanes -> 0
2299       __ vsubps(tmp, dst, kScratchDoubleReg);
2300       __ vcmpleps(kScratchDoubleReg, kScratchDoubleReg, tmp);
2301       __ vcvttps2dq(tmp, tmp);
2302       __ vpxor(tmp, tmp, kScratchDoubleReg);
2303       __ vpxor(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
2304       __ vpmaxsd(tmp, tmp, kScratchDoubleReg);
2305       // convert. Overflow lanes above max_signed will be 0x80000000
2306       __ vcvttps2dq(dst, dst);
2307       // Add (src-max_signed) for overflow lanes.
2308       __ vpaddd(dst, dst, tmp);
2309       break;
2310     }
2311     case kIA32I32x4UConvertI16x8Low: {
2312       __ Pmovzxwd(i.OutputSimd128Register(), i.InputOperand(0));
2313       break;
2314     }
2315     case kIA32I32x4UConvertI16x8High: {
2316       XMMRegister dst = i.OutputSimd128Register();
2317       __ Palignr(dst, i.InputOperand(0), 8);
2318       __ Pmovzxwd(dst, dst);
2319       break;
2320     }
2321     case kSSEI32x4ShrU: {
2322       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2323       __ psrld(i.OutputSimd128Register(), i.InputInt8(1));
2324       break;
2325     }
2326     case kAVXI32x4ShrU: {
2327       CpuFeatureScope avx_scope(tasm(), AVX);
2328       __ vpsrld(i.OutputSimd128Register(), i.InputSimd128Register(0),
2329                 i.InputInt8(1));
2330       break;
2331     }
2332     case kSSEI32x4MinU: {
2333       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2334       CpuFeatureScope sse_scope(tasm(), SSE4_1);
2335       __ pminud(i.OutputSimd128Register(), i.InputOperand(1));
2336       break;
2337     }
2338     case kAVXI32x4MinU: {
2339       CpuFeatureScope avx_scope(tasm(), AVX);
2340       __ vpminud(i.OutputSimd128Register(), i.InputSimd128Register(0),
2341                  i.InputOperand(1));
2342       break;
2343     }
2344     case kSSEI32x4MaxU: {
2345       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2346       CpuFeatureScope sse_scope(tasm(), SSE4_1);
2347       __ pmaxud(i.OutputSimd128Register(), i.InputOperand(1));
2348       break;
2349     }
2350     case kAVXI32x4MaxU: {
2351       CpuFeatureScope avx_scope(tasm(), AVX);
2352       __ vpmaxud(i.OutputSimd128Register(), i.InputSimd128Register(0),
2353                  i.InputOperand(1));
2354       break;
2355     }
2356     case kSSEI32x4GtU: {
2357       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2358       CpuFeatureScope sse_scope(tasm(), SSE4_1);
2359       XMMRegister dst = i.OutputSimd128Register();
2360       Operand src = i.InputOperand(1);
2361       __ pmaxud(dst, src);
2362       __ pcmpeqd(dst, src);
2363       __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2364       __ pxor(dst, kScratchDoubleReg);
2365       break;
2366     }
2367     case kAVXI32x4GtU: {
2368       CpuFeatureScope avx_scope(tasm(), AVX);
2369       XMMRegister dst = i.OutputSimd128Register();
2370       XMMRegister src1 = i.InputSimd128Register(0);
2371       Operand src2 = i.InputOperand(1);
2372       __ vpmaxud(kScratchDoubleReg, src1, src2);
2373       __ vpcmpeqd(dst, kScratchDoubleReg, src2);
2374       __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
2375       __ vpxor(dst, dst, kScratchDoubleReg);
2376       break;
2377     }
2378     case kSSEI32x4GeU: {
2379       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2380       CpuFeatureScope sse_scope(tasm(), SSE4_1);
2381       XMMRegister dst = i.OutputSimd128Register();
2382       Operand src = i.InputOperand(1);
2383       __ pminud(dst, src);
2384       __ pcmpeqd(dst, src);
2385       break;
2386     }
2387     case kAVXI32x4GeU: {
2388       CpuFeatureScope avx_scope(tasm(), AVX);
2389       XMMRegister src1 = i.InputSimd128Register(0);
2390       Operand src2 = i.InputOperand(1);
2391       __ vpminud(kScratchDoubleReg, src1, src2);
2392       __ vpcmpeqd(i.OutputSimd128Register(), kScratchDoubleReg, src2);
2393       break;
2394     }
2395     case kIA32I16x8Splat: {
2396       XMMRegister dst = i.OutputSimd128Register();
2397       __ Movd(dst, i.InputOperand(0));
2398       __ Pshuflw(dst, dst, 0x0);
2399       __ Pshufd(dst, dst, 0x0);
2400       break;
2401     }
2402     case kIA32I16x8ExtractLane: {
2403       Register dst = i.OutputRegister();
2404       __ Pextrw(dst, i.InputSimd128Register(0), i.InputInt8(1));
2405       __ movsx_w(dst, dst);
2406       break;
2407     }
2408     case kSSEI16x8ReplaceLane: {
2409       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2410       __ pinsrw(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
2411       break;
2412     }
2413     case kAVXI16x8ReplaceLane: {
2414       CpuFeatureScope avx_scope(tasm(), AVX);
2415       __ vpinsrw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2416                  i.InputOperand(2), i.InputInt8(1));
2417       break;
2418     }
2419     case kIA32I16x8SConvertI8x16Low: {
2420       __ Pmovsxbw(i.OutputSimd128Register(), i.InputOperand(0));
2421       break;
2422     }
2423     case kIA32I16x8SConvertI8x16High: {
2424       XMMRegister dst = i.OutputSimd128Register();
2425       __ Palignr(dst, i.InputOperand(0), 8);
2426       __ Pmovsxbw(dst, dst);
2427       break;
2428     }
2429     case kIA32I16x8Neg: {
2430       XMMRegister dst = i.OutputSimd128Register();
2431       Operand src = i.InputOperand(0);
2432       if (src.is_reg(dst)) {
2433         __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2434         __ Psignw(dst, kScratchDoubleReg);
2435       } else {
2436         __ Pxor(dst, dst);
2437         __ Psubw(dst, src);
2438       }
2439       break;
2440     }
2441     case kSSEI16x8Shl: {
2442       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2443       __ psllw(i.OutputSimd128Register(), i.InputInt8(1));
2444       break;
2445     }
2446     case kAVXI16x8Shl: {
2447       CpuFeatureScope avx_scope(tasm(), AVX);
2448       __ vpsllw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2449                 i.InputInt8(1));
2450       break;
2451     }
2452     case kSSEI16x8ShrS: {
2453       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2454       __ psraw(i.OutputSimd128Register(), i.InputInt8(1));
2455       break;
2456     }
2457     case kAVXI16x8ShrS: {
2458       CpuFeatureScope avx_scope(tasm(), AVX);
2459       __ vpsraw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2460                 i.InputInt8(1));
2461       break;
2462     }
2463     case kSSEI16x8SConvertI32x4: {
2464       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2465       __ packssdw(i.OutputSimd128Register(), i.InputOperand(1));
2466       break;
2467     }
2468     case kAVXI16x8SConvertI32x4: {
2469       CpuFeatureScope avx_scope(tasm(), AVX);
2470       __ vpackssdw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2471                    i.InputOperand(1));
2472       break;
2473     }
2474     case kSSEI16x8Add: {
2475       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2476       __ paddw(i.OutputSimd128Register(), i.InputOperand(1));
2477       break;
2478     }
2479     case kAVXI16x8Add: {
2480       CpuFeatureScope avx_scope(tasm(), AVX);
2481       __ vpaddw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2482                 i.InputOperand(1));
2483       break;
2484     }
2485     case kSSEI16x8AddSaturateS: {
2486       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2487       __ paddsw(i.OutputSimd128Register(), i.InputOperand(1));
2488       break;
2489     }
2490     case kAVXI16x8AddSaturateS: {
2491       CpuFeatureScope avx_scope(tasm(), AVX);
2492       __ vpaddsw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2493                  i.InputOperand(1));
2494       break;
2495     }
2496     case kSSEI16x8AddHoriz: {
2497       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2498       CpuFeatureScope sse_scope(tasm(), SSSE3);
2499       __ phaddw(i.OutputSimd128Register(), i.InputOperand(1));
2500       break;
2501     }
2502     case kAVXI16x8AddHoriz: {
2503       CpuFeatureScope avx_scope(tasm(), AVX);
2504       __ vphaddw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2505                  i.InputOperand(1));
2506       break;
2507     }
2508     case kSSEI16x8Sub: {
2509       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2510       __ psubw(i.OutputSimd128Register(), i.InputOperand(1));
2511       break;
2512     }
2513     case kAVXI16x8Sub: {
2514       CpuFeatureScope avx_scope(tasm(), AVX);
2515       __ vpsubw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2516                 i.InputOperand(1));
2517       break;
2518     }
2519     case kSSEI16x8SubSaturateS: {
2520       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2521       __ psubsw(i.OutputSimd128Register(), i.InputOperand(1));
2522       break;
2523     }
2524     case kAVXI16x8SubSaturateS: {
2525       CpuFeatureScope avx_scope(tasm(), AVX);
2526       __ vpsubsw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2527                  i.InputOperand(1));
2528       break;
2529     }
2530     case kSSEI16x8Mul: {
2531       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2532       __ pmullw(i.OutputSimd128Register(), i.InputOperand(1));
2533       break;
2534     }
2535     case kAVXI16x8Mul: {
2536       CpuFeatureScope avx_scope(tasm(), AVX);
2537       __ vpmullw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2538                  i.InputOperand(1));
2539       break;
2540     }
2541     case kSSEI16x8MinS: {
2542       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2543       __ pminsw(i.OutputSimd128Register(), i.InputOperand(1));
2544       break;
2545     }
2546     case kAVXI16x8MinS: {
2547       CpuFeatureScope avx_scope(tasm(), AVX);
2548       __ vpminsw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2549                  i.InputOperand(1));
2550       break;
2551     }
2552     case kSSEI16x8MaxS: {
2553       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2554       __ pmaxsw(i.OutputSimd128Register(), i.InputOperand(1));
2555       break;
2556     }
2557     case kAVXI16x8MaxS: {
2558       CpuFeatureScope avx_scope(tasm(), AVX);
2559       __ vpmaxsw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2560                  i.InputOperand(1));
2561       break;
2562     }
2563     case kSSEI16x8Eq: {
2564       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2565       __ pcmpeqw(i.OutputSimd128Register(), i.InputOperand(1));
2566       break;
2567     }
2568     case kAVXI16x8Eq: {
2569       CpuFeatureScope avx_scope(tasm(), AVX);
2570       __ vpcmpeqw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2571                   i.InputOperand(1));
2572       break;
2573     }
2574     case kSSEI16x8Ne: {
2575       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2576       __ pcmpeqw(i.OutputSimd128Register(), i.InputOperand(1));
2577       __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
2578       __ pxor(i.OutputSimd128Register(), kScratchDoubleReg);
2579       break;
2580     }
2581     case kAVXI16x8Ne: {
2582       CpuFeatureScope avx_scope(tasm(), AVX);
2583       __ vpcmpeqw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2584                   i.InputOperand(1));
2585       __ vpcmpeqw(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
2586       __ vpxor(i.OutputSimd128Register(), i.OutputSimd128Register(),
2587                kScratchDoubleReg);
2588       break;
2589     }
2590     case kSSEI16x8GtS: {
2591       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2592       __ pcmpgtw(i.OutputSimd128Register(), i.InputOperand(1));
2593       break;
2594     }
2595     case kAVXI16x8GtS: {
2596       CpuFeatureScope avx_scope(tasm(), AVX);
2597       __ vpcmpgtw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2598                   i.InputOperand(1));
2599       break;
2600     }
2601     case kSSEI16x8GeS: {
2602       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2603       XMMRegister dst = i.OutputSimd128Register();
2604       Operand src = i.InputOperand(1);
2605       __ pminsw(dst, src);
2606       __ pcmpeqw(dst, src);
2607       break;
2608     }
2609     case kAVXI16x8GeS: {
2610       CpuFeatureScope avx_scope(tasm(), AVX);
2611       XMMRegister src1 = i.InputSimd128Register(0);
2612       Operand src2 = i.InputOperand(1);
2613       __ vpminsw(kScratchDoubleReg, src1, src2);
2614       __ vpcmpeqw(i.OutputSimd128Register(), kScratchDoubleReg, src2);
2615       break;
2616     }
2617     case kIA32I16x8UConvertI8x16Low: {
2618       __ Pmovzxbw(i.OutputSimd128Register(), i.InputOperand(0));
2619       break;
2620     }
2621     case kIA32I16x8UConvertI8x16High: {
2622       XMMRegister dst = i.OutputSimd128Register();
2623       __ Palignr(dst, i.InputOperand(0), 8);
2624       __ Pmovzxbw(dst, dst);
2625       break;
2626     }
2627     case kSSEI16x8ShrU: {
2628       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2629       __ psrlw(i.OutputSimd128Register(), i.InputInt8(1));
2630       break;
2631     }
2632     case kAVXI16x8ShrU: {
2633       CpuFeatureScope avx_scope(tasm(), AVX);
2634       __ vpsrlw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2635                 i.InputInt8(1));
2636       break;
2637     }
2638     case kSSEI16x8UConvertI32x4: {
2639       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2640       CpuFeatureScope sse_scope(tasm(), SSE4_1);
2641       XMMRegister dst = i.OutputSimd128Register();
2642       // Change negative lanes to 0x7FFFFFFF
2643       __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2644       __ psrld(kScratchDoubleReg, 1);
2645       __ pminud(dst, kScratchDoubleReg);
2646       __ pminud(kScratchDoubleReg, i.InputOperand(1));
2647       __ packusdw(dst, kScratchDoubleReg);
2648       break;
2649     }
2650     case kAVXI16x8UConvertI32x4: {
2651       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2652       CpuFeatureScope avx_scope(tasm(), AVX);
2653       XMMRegister dst = i.OutputSimd128Register();
2654       // Change negative lanes to 0x7FFFFFFF
2655       __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
2656       __ vpsrld(kScratchDoubleReg, kScratchDoubleReg, 1);
2657       __ vpminud(dst, kScratchDoubleReg, i.InputSimd128Register(0));
2658       __ vpminud(kScratchDoubleReg, kScratchDoubleReg, i.InputOperand(1));
2659       __ vpackusdw(dst, dst, kScratchDoubleReg);
2660       break;
2661     }
2662     case kSSEI16x8AddSaturateU: {
2663       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2664       __ paddusw(i.OutputSimd128Register(), i.InputOperand(1));
2665       break;
2666     }
2667     case kAVXI16x8AddSaturateU: {
2668       CpuFeatureScope avx_scope(tasm(), AVX);
2669       __ vpaddusw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2670                   i.InputOperand(1));
2671       break;
2672     }
2673     case kSSEI16x8SubSaturateU: {
2674       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2675       __ psubusw(i.OutputSimd128Register(), i.InputOperand(1));
2676       break;
2677     }
2678     case kAVXI16x8SubSaturateU: {
2679       CpuFeatureScope avx_scope(tasm(), AVX);
2680       __ vpsubusw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2681                   i.InputOperand(1));
2682       break;
2683     }
2684     case kSSEI16x8MinU: {
2685       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2686       CpuFeatureScope sse_scope(tasm(), SSE4_1);
2687       __ pminuw(i.OutputSimd128Register(), i.InputOperand(1));
2688       break;
2689     }
2690     case kAVXI16x8MinU: {
2691       CpuFeatureScope avx_scope(tasm(), AVX);
2692       __ vpminuw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2693                  i.InputOperand(1));
2694       break;
2695     }
2696     case kSSEI16x8MaxU: {
2697       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2698       CpuFeatureScope sse_scope(tasm(), SSE4_1);
2699       __ pmaxuw(i.OutputSimd128Register(), i.InputOperand(1));
2700       break;
2701     }
2702     case kAVXI16x8MaxU: {
2703       CpuFeatureScope avx_scope(tasm(), AVX);
2704       __ vpmaxuw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2705                  i.InputOperand(1));
2706       break;
2707     }
2708     case kSSEI16x8GtU: {
2709       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2710       CpuFeatureScope sse_scope(tasm(), SSE4_1);
2711       XMMRegister dst = i.OutputSimd128Register();
2712       Operand src = i.InputOperand(1);
2713       __ pmaxuw(dst, src);
2714       __ pcmpeqw(dst, src);
2715       __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
2716       __ pxor(dst, kScratchDoubleReg);
2717       break;
2718     }
2719     case kAVXI16x8GtU: {
2720       CpuFeatureScope avx_scope(tasm(), AVX);
2721       XMMRegister dst = i.OutputSimd128Register();
2722       XMMRegister src1 = i.InputSimd128Register(0);
2723       Operand src2 = i.InputOperand(1);
2724       __ vpmaxuw(kScratchDoubleReg, src1, src2);
2725       __ vpcmpeqw(dst, kScratchDoubleReg, src2);
2726       __ vpcmpeqw(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
2727       __ vpxor(dst, dst, kScratchDoubleReg);
2728       break;
2729     }
2730     case kSSEI16x8GeU: {
2731       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2732       CpuFeatureScope sse_scope(tasm(), SSE4_1);
2733       XMMRegister dst = i.OutputSimd128Register();
2734       Operand src = i.InputOperand(1);
2735       __ pminuw(dst, src);
2736       __ pcmpeqw(dst, src);
2737       break;
2738     }
2739     case kAVXI16x8GeU: {
2740       CpuFeatureScope avx_scope(tasm(), AVX);
2741       XMMRegister src1 = i.InputSimd128Register(0);
2742       Operand src2 = i.InputOperand(1);
2743       __ vpminuw(kScratchDoubleReg, src1, src2);
2744       __ vpcmpeqw(i.OutputSimd128Register(), kScratchDoubleReg, src2);
2745       break;
2746     }
2747     case kIA32I8x16Splat: {
2748       XMMRegister dst = i.OutputSimd128Register();
2749       __ Movd(dst, i.InputOperand(0));
2750       __ Pxor(kScratchDoubleReg, kScratchDoubleReg);
2751       __ Pshufb(dst, kScratchDoubleReg);
2752       break;
2753     }
2754     case kIA32I8x16ExtractLane: {
2755       Register dst = i.OutputRegister();
2756       __ Pextrb(dst, i.InputSimd128Register(0), i.InputInt8(1));
2757       __ movsx_b(dst, dst);
2758       break;
2759     }
2760     case kSSEI8x16ReplaceLane: {
2761       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2762       CpuFeatureScope sse_scope(tasm(), SSE4_1);
2763       __ pinsrb(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
2764       break;
2765     }
2766     case kAVXI8x16ReplaceLane: {
2767       CpuFeatureScope avx_scope(tasm(), AVX);
2768       __ vpinsrb(i.OutputSimd128Register(), i.InputSimd128Register(0),
2769                  i.InputOperand(2), i.InputInt8(1));
2770       break;
2771     }
2772     case kSSEI8x16SConvertI16x8: {
2773       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2774       __ packsswb(i.OutputSimd128Register(), i.InputOperand(1));
2775       break;
2776     }
2777     case kAVXI8x16SConvertI16x8: {
2778       CpuFeatureScope avx_scope(tasm(), AVX);
2779       __ vpacksswb(i.OutputSimd128Register(), i.InputSimd128Register(0),
2780                    i.InputOperand(1));
2781       break;
2782     }
2783     case kIA32I8x16Neg: {
2784       XMMRegister dst = i.OutputSimd128Register();
2785       Operand src = i.InputOperand(0);
2786       if (src.is_reg(dst)) {
2787         __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2788         __ Psignb(dst, kScratchDoubleReg);
2789       } else {
2790         __ Pxor(dst, dst);
2791         __ Psubb(dst, src);
2792       }
2793       break;
2794     }
2795     case kSSEI8x16Shl: {
2796       XMMRegister dst = i.OutputSimd128Register();
2797       DCHECK_EQ(dst, i.InputSimd128Register(0));
2798       int8_t shift = i.InputInt8(1) & 0x7;
2799       if (shift < 4) {
2800         // For small shifts, doubling is faster.
2801         for (int i = 0; i < shift; ++i) {
2802           __ paddb(dst, dst);
2803         }
2804       } else {
2805         // Mask off the unwanted bits before word-shifting.
2806         __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
2807         __ psrlw(kScratchDoubleReg, 8 + shift);
2808         __ packuswb(kScratchDoubleReg, kScratchDoubleReg);
2809         __ pand(dst, kScratchDoubleReg);
2810         __ psllw(dst, shift);
2811       }
2812       break;
2813     }
2814     case kAVXI8x16Shl: {
2815       CpuFeatureScope avx_scope(tasm(), AVX);
2816       XMMRegister dst = i.OutputSimd128Register();
2817       XMMRegister src = i.InputSimd128Register(0);
2818       int8_t shift = i.InputInt8(1) & 0x7;
2819       if (shift < 4) {
2820         // For small shifts, doubling is faster.
2821         for (int i = 0; i < shift; ++i) {
2822           __ vpaddb(dst, src, src);
2823           src = dst;
2824         }
2825       } else {
2826         // Mask off the unwanted bits before word-shifting.
2827         __ vpcmpeqw(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
2828         __ vpsrlw(kScratchDoubleReg, kScratchDoubleReg, 8 + shift);
2829         __ vpackuswb(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
2830         __ vpand(dst, src, kScratchDoubleReg);
2831         __ vpsllw(dst, dst, shift);
2832       }
2833       break;
2834     }
2835     case kIA32I8x16ShrS: {
2836       XMMRegister dst = i.OutputSimd128Register();
2837       XMMRegister src = i.InputSimd128Register(0);
2838       int8_t shift = i.InputInt8(1) & 0x7;
2839       // Unpack the bytes into words, do arithmetic shifts, and repack.
2840       __ Punpckhbw(kScratchDoubleReg, src);
2841       __ Punpcklbw(dst, src);
2842       __ Psraw(kScratchDoubleReg, 8 + shift);
2843       __ Psraw(dst, 8 + shift);
2844       __ Packsswb(dst, kScratchDoubleReg);
2845       break;
2846     }
2847     case kSSEI8x16Add: {
2848       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2849       __ paddb(i.OutputSimd128Register(), i.InputOperand(1));
2850       break;
2851     }
2852     case kAVXI8x16Add: {
2853       CpuFeatureScope avx_scope(tasm(), AVX);
2854       __ vpaddb(i.OutputSimd128Register(), i.InputSimd128Register(0),
2855                 i.InputOperand(1));
2856       break;
2857     }
2858     case kSSEI8x16AddSaturateS: {
2859       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2860       __ paddsb(i.OutputSimd128Register(), i.InputOperand(1));
2861       break;
2862     }
2863     case kAVXI8x16AddSaturateS: {
2864       CpuFeatureScope avx_scope(tasm(), AVX);
2865       __ vpaddsb(i.OutputSimd128Register(), i.InputSimd128Register(0),
2866                  i.InputOperand(1));
2867       break;
2868     }
2869     case kSSEI8x16Sub: {
2870       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2871       __ psubb(i.OutputSimd128Register(), i.InputOperand(1));
2872       break;
2873     }
2874     case kAVXI8x16Sub: {
2875       CpuFeatureScope avx_scope(tasm(), AVX);
2876       __ vpsubb(i.OutputSimd128Register(), i.InputSimd128Register(0),
2877                 i.InputOperand(1));
2878       break;
2879     }
2880     case kSSEI8x16SubSaturateS: {
2881       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2882       __ psubsb(i.OutputSimd128Register(), i.InputOperand(1));
2883       break;
2884     }
2885     case kAVXI8x16SubSaturateS: {
2886       CpuFeatureScope avx_scope(tasm(), AVX);
2887       __ vpsubsb(i.OutputSimd128Register(), i.InputSimd128Register(0),
2888                  i.InputOperand(1));
2889       break;
2890     }
2891     case kSSEI8x16Mul: {
2892       XMMRegister dst = i.OutputSimd128Register();
2893       DCHECK_EQ(dst, i.InputSimd128Register(0));
2894       XMMRegister right = i.InputSimd128Register(1);
2895       XMMRegister tmp = i.ToSimd128Register(instr->TempAt(0));
2896 
2897       // I16x8 view of I8x16
2898       // left = AAaa AAaa ... AAaa AAaa
2899       // right= BBbb BBbb ... BBbb BBbb
2900 
2901       // t = 00AA 00AA ... 00AA 00AA
2902       // s = 00BB 00BB ... 00BB 00BB
2903       __ movaps(tmp, dst);
2904       __ movaps(kScratchDoubleReg, right);
2905       __ psrlw(tmp, 8);
2906       __ psrlw(kScratchDoubleReg, 8);
2907       // dst = left * 256
2908       __ psllw(dst, 8);
2909 
2910       // t = I16x8Mul(t, s)
2911       //    => __PP __PP ...  __PP  __PP
2912       __ pmullw(tmp, kScratchDoubleReg);
2913       // dst = I16x8Mul(left * 256, right)
2914       //    => pp__ pp__ ...  pp__  pp__
2915       __ pmullw(dst, right);
2916 
2917       // t = I16x8Shl(t, 8)
2918       //    => PP00 PP00 ...  PP00  PP00
2919       __ psllw(tmp, 8);
2920 
2921       // dst = I16x8Shr(dst, 8)
2922       //    => 00pp 00pp ...  00pp  00pp
2923       __ psrlw(dst, 8);
2924 
2925       // dst = I16x8Or(dst, t)
2926       //    => PPpp PPpp ...  PPpp  PPpp
2927       __ por(dst, tmp);
2928       break;
2929     }
2930     case kAVXI8x16Mul: {
2931       CpuFeatureScope avx_scope(tasm(), AVX);
2932       XMMRegister dst = i.OutputSimd128Register();
2933       XMMRegister left = i.InputSimd128Register(0);
2934       XMMRegister right = i.InputSimd128Register(1);
2935       XMMRegister tmp = i.ToSimd128Register(instr->TempAt(0));
2936 
2937       // I16x8 view of I8x16
2938       // left = AAaa AAaa ... AAaa AAaa
2939       // right= BBbb BBbb ... BBbb BBbb
2940 
2941       // t = 00AA 00AA ... 00AA 00AA
2942       // s = 00BB 00BB ... 00BB 00BB
2943       __ vpsrlw(tmp, left, 8);
2944       __ vpsrlw(kScratchDoubleReg, right, 8);
2945 
2946       // t = I16x8Mul(t0, t1)
2947       //    => __PP __PP ...  __PP  __PP
2948       __ vpmullw(tmp, tmp, kScratchDoubleReg);
2949 
2950       // s = left * 256
2951       __ vpsllw(kScratchDoubleReg, left, 8);
2952 
2953       // dst = I16x8Mul(left * 256, right)
2954       //    => pp__ pp__ ...  pp__  pp__
2955       __ vpmullw(dst, kScratchDoubleReg, right);
2956 
2957       // dst = I16x8Shr(dst, 8)
2958       //    => 00pp 00pp ...  00pp  00pp
2959       __ vpsrlw(dst, dst, 8);
2960 
2961       // t = I16x8Shl(t, 8)
2962       //    => PP00 PP00 ...  PP00  PP00
2963       __ vpsllw(tmp, tmp, 8);
2964 
2965       // dst = I16x8Or(dst, t)
2966       //    => PPpp PPpp ...  PPpp  PPpp
2967       __ vpor(dst, dst, tmp);
2968       break;
2969     }
2970     case kSSEI8x16MinS: {
2971       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2972       CpuFeatureScope sse_scope(tasm(), SSE4_1);
2973       __ pminsb(i.OutputSimd128Register(), i.InputOperand(1));
2974       break;
2975     }
2976     case kAVXI8x16MinS: {
2977       CpuFeatureScope avx_scope(tasm(), AVX);
2978       __ vpminsb(i.OutputSimd128Register(), i.InputSimd128Register(0),
2979                  i.InputOperand(1));
2980       break;
2981     }
2982     case kSSEI8x16MaxS: {
2983       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2984       CpuFeatureScope sse_scope(tasm(), SSE4_1);
2985       __ pmaxsb(i.OutputSimd128Register(), i.InputOperand(1));
2986       break;
2987     }
2988     case kAVXI8x16MaxS: {
2989       CpuFeatureScope avx_scope(tasm(), AVX);
2990       __ vpmaxsb(i.OutputSimd128Register(), i.InputSimd128Register(0),
2991                  i.InputOperand(1));
2992       break;
2993     }
2994     case kSSEI8x16Eq: {
2995       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2996       __ pcmpeqb(i.OutputSimd128Register(), i.InputOperand(1));
2997       break;
2998     }
2999     case kAVXI8x16Eq: {
3000       CpuFeatureScope avx_scope(tasm(), AVX);
3001       __ vpcmpeqb(i.OutputSimd128Register(), i.InputSimd128Register(0),
3002                   i.InputOperand(1));
3003       break;
3004     }
3005     case kSSEI8x16Ne: {
3006       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3007       __ pcmpeqb(i.OutputSimd128Register(), i.InputOperand(1));
3008       __ pcmpeqb(kScratchDoubleReg, kScratchDoubleReg);
3009       __ pxor(i.OutputSimd128Register(), kScratchDoubleReg);
3010       break;
3011     }
3012     case kAVXI8x16Ne: {
3013       CpuFeatureScope avx_scope(tasm(), AVX);
3014       __ vpcmpeqb(i.OutputSimd128Register(), i.InputSimd128Register(0),
3015                   i.InputOperand(1));
3016       __ vpcmpeqb(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
3017       __ vpxor(i.OutputSimd128Register(), i.OutputSimd128Register(),
3018                kScratchDoubleReg);
3019       break;
3020     }
3021     case kSSEI8x16GtS: {
3022       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3023       __ pcmpgtb(i.OutputSimd128Register(), i.InputOperand(1));
3024       break;
3025     }
3026     case kAVXI8x16GtS: {
3027       CpuFeatureScope avx_scope(tasm(), AVX);
3028       __ vpcmpgtb(i.OutputSimd128Register(), i.InputSimd128Register(0),
3029                   i.InputOperand(1));
3030       break;
3031     }
3032     case kSSEI8x16GeS: {
3033       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3034       CpuFeatureScope sse_scope(tasm(), SSE4_1);
3035       XMMRegister dst = i.OutputSimd128Register();
3036       Operand src = i.InputOperand(1);
3037       __ pminsb(dst, src);
3038       __ pcmpeqb(dst, src);
3039       break;
3040     }
3041     case kAVXI8x16GeS: {
3042       CpuFeatureScope avx_scope(tasm(), AVX);
3043       XMMRegister src1 = i.InputSimd128Register(0);
3044       Operand src2 = i.InputOperand(1);
3045       __ vpminsb(kScratchDoubleReg, src1, src2);
3046       __ vpcmpeqb(i.OutputSimd128Register(), kScratchDoubleReg, src2);
3047       break;
3048     }
3049     case kSSEI8x16UConvertI16x8: {
3050       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3051       CpuFeatureScope sse_scope(tasm(), SSE4_1);
3052       XMMRegister dst = i.OutputSimd128Register();
3053       // Change negative lanes to 0x7FFF
3054       __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
3055       __ psrlw(kScratchDoubleReg, 1);
3056       __ pminuw(dst, kScratchDoubleReg);
3057       __ pminuw(kScratchDoubleReg, i.InputOperand(1));
3058       __ packuswb(dst, kScratchDoubleReg);
3059       break;
3060     }
3061     case kAVXI8x16UConvertI16x8: {
3062       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3063       CpuFeatureScope avx_scope(tasm(), AVX);
3064       XMMRegister dst = i.OutputSimd128Register();
3065       // Change negative lanes to 0x7FFF
3066       __ vpcmpeqw(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
3067       __ vpsrlw(kScratchDoubleReg, kScratchDoubleReg, 1);
3068       __ vpminuw(dst, kScratchDoubleReg, i.InputSimd128Register(0));
3069       __ vpminuw(kScratchDoubleReg, kScratchDoubleReg, i.InputOperand(1));
3070       __ vpackuswb(dst, dst, kScratchDoubleReg);
3071       break;
3072     }
3073     case kSSEI8x16AddSaturateU: {
3074       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3075       __ paddusb(i.OutputSimd128Register(), i.InputOperand(1));
3076       break;
3077     }
3078     case kAVXI8x16AddSaturateU: {
3079       CpuFeatureScope avx_scope(tasm(), AVX);
3080       __ vpaddusb(i.OutputSimd128Register(), i.InputSimd128Register(0),
3081                   i.InputOperand(1));
3082       break;
3083     }
3084     case kSSEI8x16SubSaturateU: {
3085       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3086       __ psubusb(i.OutputSimd128Register(), i.InputOperand(1));
3087       break;
3088     }
3089     case kAVXI8x16SubSaturateU: {
3090       CpuFeatureScope avx_scope(tasm(), AVX);
3091       __ vpsubusb(i.OutputSimd128Register(), i.InputSimd128Register(0),
3092                   i.InputOperand(1));
3093       break;
3094     }
3095     case kIA32I8x16ShrU: {
3096       XMMRegister dst = i.OutputSimd128Register();
3097       XMMRegister src = i.InputSimd128Register(0);
3098       int8_t shift = i.InputInt8(1) & 0x7;
3099       // Unpack the bytes into words, do logical shifts, and repack.
3100       __ Punpckhbw(kScratchDoubleReg, src);
3101       __ Punpcklbw(dst, src);
3102       __ Psrlw(kScratchDoubleReg, 8 + shift);
3103       __ Psrlw(dst, 8 + shift);
3104       __ Packuswb(dst, kScratchDoubleReg);
3105       break;
3106     }
3107     case kSSEI8x16MinU: {
3108       XMMRegister dst = i.OutputSimd128Register();
3109       DCHECK_EQ(dst, i.InputSimd128Register(0));
3110       __ pminub(dst, i.InputOperand(1));
3111       break;
3112     }
3113     case kAVXI8x16MinU: {
3114       CpuFeatureScope avx_scope(tasm(), AVX);
3115       __ vpminub(i.OutputSimd128Register(), i.InputSimd128Register(0),
3116                  i.InputOperand(1));
3117       break;
3118     }
3119     case kSSEI8x16MaxU: {
3120       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3121       __ pmaxub(i.OutputSimd128Register(), i.InputOperand(1));
3122       break;
3123     }
3124     case kAVXI8x16MaxU: {
3125       CpuFeatureScope avx_scope(tasm(), AVX);
3126       __ vpmaxub(i.OutputSimd128Register(), i.InputSimd128Register(0),
3127                  i.InputOperand(1));
3128       break;
3129     }
3130     case kSSEI8x16GtU: {
3131       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3132       XMMRegister dst = i.OutputSimd128Register();
3133       Operand src = i.InputOperand(1);
3134       __ pmaxub(dst, src);
3135       __ pcmpeqb(dst, src);
3136       __ pcmpeqb(kScratchDoubleReg, kScratchDoubleReg);
3137       __ pxor(dst, kScratchDoubleReg);
3138       break;
3139     }
3140     case kAVXI8x16GtU: {
3141       CpuFeatureScope avx_scope(tasm(), AVX);
3142       XMMRegister dst = i.OutputSimd128Register();
3143       XMMRegister src1 = i.InputSimd128Register(0);
3144       Operand src2 = i.InputOperand(1);
3145       __ vpmaxub(kScratchDoubleReg, src1, src2);
3146       __ vpcmpeqb(dst, kScratchDoubleReg, src2);
3147       __ vpcmpeqb(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
3148       __ vpxor(dst, dst, kScratchDoubleReg);
3149       break;
3150     }
3151     case kSSEI8x16GeU: {
3152       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3153       XMMRegister dst = i.OutputSimd128Register();
3154       Operand src = i.InputOperand(1);
3155       __ pminub(dst, src);
3156       __ pcmpeqb(dst, src);
3157       break;
3158     }
3159     case kAVXI8x16GeU: {
3160       CpuFeatureScope avx_scope(tasm(), AVX);
3161       XMMRegister src1 = i.InputSimd128Register(0);
3162       Operand src2 = i.InputOperand(1);
3163       __ vpminub(kScratchDoubleReg, src1, src2);
3164       __ vpcmpeqb(i.OutputSimd128Register(), kScratchDoubleReg, src2);
3165       break;
3166     }
3167     case kIA32S128Zero: {
3168       XMMRegister dst = i.OutputSimd128Register();
3169       __ Pxor(dst, dst);
3170       break;
3171     }
3172     case kSSES128Not: {
3173       XMMRegister dst = i.OutputSimd128Register();
3174       Operand src = i.InputOperand(0);
3175       if (src.is_reg(dst)) {
3176         __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
3177         __ pxor(dst, kScratchDoubleReg);
3178       } else {
3179         __ pcmpeqd(dst, dst);
3180         __ pxor(dst, src);
3181       }
3182       break;
3183     }
3184     case kAVXS128Not: {
3185       CpuFeatureScope avx_scope(tasm(), AVX);
3186       __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
3187       __ vpxor(i.OutputSimd128Register(), kScratchDoubleReg, i.InputOperand(0));
3188       break;
3189     }
3190     case kSSES128And: {
3191       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3192       __ pand(i.OutputSimd128Register(), i.InputOperand(1));
3193       break;
3194     }
3195     case kAVXS128And: {
3196       CpuFeatureScope avx_scope(tasm(), AVX);
3197       __ vpand(i.OutputSimd128Register(), i.InputSimd128Register(0),
3198                i.InputOperand(1));
3199       break;
3200     }
3201     case kSSES128Or: {
3202       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3203       __ por(i.OutputSimd128Register(), i.InputOperand(1));
3204       break;
3205     }
3206     case kAVXS128Or: {
3207       CpuFeatureScope avx_scope(tasm(), AVX);
3208       __ vpor(i.OutputSimd128Register(), i.InputSimd128Register(0),
3209               i.InputOperand(1));
3210       break;
3211     }
3212     case kSSES128Xor: {
3213       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3214       __ pxor(i.OutputSimd128Register(), i.InputOperand(1));
3215       break;
3216     }
3217     case kAVXS128Xor: {
3218       CpuFeatureScope avx_scope(tasm(), AVX);
3219       __ vpxor(i.OutputSimd128Register(), i.InputSimd128Register(0),
3220                i.InputOperand(1));
3221       break;
3222     }
3223     case kSSES128Select: {
3224       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3225       // Mask used here is stored in dst.
3226       XMMRegister dst = i.OutputSimd128Register();
3227       __ movaps(kScratchDoubleReg, i.InputSimd128Register(1));
3228       __ xorps(kScratchDoubleReg, i.InputSimd128Register(2));
3229       __ andps(dst, kScratchDoubleReg);
3230       __ xorps(dst, i.InputSimd128Register(2));
3231       break;
3232     }
3233     case kAVXS128Select: {
3234       CpuFeatureScope avx_scope(tasm(), AVX);
3235       XMMRegister dst = i.OutputSimd128Register();
3236       __ vxorps(kScratchDoubleReg, i.InputSimd128Register(2),
3237                 i.InputOperand(1));
3238       __ vandps(dst, kScratchDoubleReg, i.InputOperand(0));
3239       __ vxorps(dst, dst, i.InputSimd128Register(2));
3240       break;
3241     }
3242     case kIA32S8x16Shuffle: {
3243       XMMRegister dst = i.OutputSimd128Register();
3244       Operand src0 = i.InputOperand(0);
3245       Register tmp = i.TempRegister(0);
3246       // Prepare 16 byte aligned buffer for shuffle control mask
3247       __ mov(tmp, esp);
3248       __ and_(esp, -16);
3249       if (instr->InputCount() == 5) {  // only one input operand
3250         DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3251         for (int j = 4; j > 0; j--) {
3252           uint32_t mask = i.InputUint32(j);
3253           __ push(Immediate(mask));
3254         }
3255         __ Pshufb(dst, Operand(esp, 0));
3256       } else {  // two input operands
3257         DCHECK_EQ(6, instr->InputCount());
3258         __ movups(kScratchDoubleReg, src0);
3259         for (int j = 5; j > 1; j--) {
3260           uint32_t lanes = i.InputUint32(j);
3261           uint32_t mask = 0;
3262           for (int k = 0; k < 32; k += 8) {
3263             uint8_t lane = lanes >> k;
3264             mask |= (lane < kSimd128Size ? lane : 0x80) << k;
3265           }
3266           __ push(Immediate(mask));
3267         }
3268         __ Pshufb(kScratchDoubleReg, Operand(esp, 0));
3269         Operand src1 = i.InputOperand(1);
3270         if (!src1.is_reg(dst)) __ movups(dst, src1);
3271         for (int j = 5; j > 1; j--) {
3272           uint32_t lanes = i.InputUint32(j);
3273           uint32_t mask = 0;
3274           for (int k = 0; k < 32; k += 8) {
3275             uint8_t lane = lanes >> k;
3276             mask |= (lane >= kSimd128Size ? (lane & 0xF) : 0x80) << k;
3277           }
3278           __ push(Immediate(mask));
3279         }
3280         __ Pshufb(dst, Operand(esp, 0));
3281         __ por(dst, kScratchDoubleReg);
3282       }
3283       __ mov(esp, tmp);
3284       break;
3285     }
3286     case kIA32S32x4Swizzle: {
3287       DCHECK_EQ(2, instr->InputCount());
3288       __ Pshufd(i.OutputSimd128Register(), i.InputOperand(0), i.InputInt8(1));
3289       break;
3290     }
3291     case kIA32S32x4Shuffle: {
3292       DCHECK_EQ(4, instr->InputCount());  // Swizzles should be handled above.
3293       int8_t shuffle = i.InputInt8(2);
3294       DCHECK_NE(0xe4, shuffle);  // A simple blend should be handled below.
3295       __ Pshufd(kScratchDoubleReg, i.InputOperand(1), shuffle);
3296       __ Pshufd(i.OutputSimd128Register(), i.InputOperand(0), shuffle);
3297       __ Pblendw(i.OutputSimd128Register(), kScratchDoubleReg, i.InputInt8(3));
3298       break;
3299     }
3300     case kIA32S16x8Blend:
3301       ASSEMBLE_SIMD_IMM_SHUFFLE(pblendw, SSE4_1, i.InputInt8(2));
3302       break;
3303     case kIA32S16x8HalfShuffle1: {
3304       XMMRegister dst = i.OutputSimd128Register();
3305       __ Pshuflw(dst, i.InputOperand(0), i.InputInt8(1));
3306       __ Pshufhw(dst, dst, i.InputInt8(2));
3307       break;
3308     }
3309     case kIA32S16x8HalfShuffle2: {
3310       XMMRegister dst = i.OutputSimd128Register();
3311       __ Pshuflw(kScratchDoubleReg, i.InputOperand(1), i.InputInt8(2));
3312       __ Pshufhw(kScratchDoubleReg, kScratchDoubleReg, i.InputInt8(3));
3313       __ Pshuflw(dst, i.InputOperand(0), i.InputInt8(2));
3314       __ Pshufhw(dst, dst, i.InputInt8(3));
3315       __ Pblendw(dst, kScratchDoubleReg, i.InputInt8(4));
3316       break;
3317     }
3318     case kIA32S8x16Alignr:
3319       ASSEMBLE_SIMD_IMM_SHUFFLE(palignr, SSSE3, i.InputInt8(2));
3320       break;
3321     case kIA32S16x8Dup: {
3322       XMMRegister dst = i.OutputSimd128Register();
3323       Operand src = i.InputOperand(0);
3324       int8_t lane = i.InputInt8(1) & 0x7;
3325       int8_t lane4 = lane & 0x3;
3326       int8_t half_dup = lane4 | (lane4 << 2) | (lane4 << 4) | (lane4 << 6);
3327       if (lane < 4) {
3328         __ Pshuflw(dst, src, half_dup);
3329         __ Pshufd(dst, dst, 0);
3330       } else {
3331         __ Pshufhw(dst, src, half_dup);
3332         __ Pshufd(dst, dst, 0xaa);
3333       }
3334       break;
3335     }
3336     case kIA32S8x16Dup: {
3337       XMMRegister dst = i.OutputSimd128Register();
3338       XMMRegister src = i.InputSimd128Register(0);
3339       int8_t lane = i.InputInt8(1) & 0xf;
3340       if (CpuFeatures::IsSupported(AVX)) {
3341         CpuFeatureScope avx_scope(tasm(), AVX);
3342         if (lane < 8) {
3343           __ vpunpcklbw(dst, src, src);
3344         } else {
3345           __ vpunpckhbw(dst, src, src);
3346         }
3347       } else {
3348         DCHECK_EQ(dst, src);
3349         if (lane < 8) {
3350           __ punpcklbw(dst, dst);
3351         } else {
3352           __ punpckhbw(dst, dst);
3353         }
3354       }
3355       lane &= 0x7;
3356       int8_t lane4 = lane & 0x3;
3357       int8_t half_dup = lane4 | (lane4 << 2) | (lane4 << 4) | (lane4 << 6);
3358       if (lane < 4) {
3359         __ Pshuflw(dst, dst, half_dup);
3360         __ Pshufd(dst, dst, 0);
3361       } else {
3362         __ Pshufhw(dst, dst, half_dup);
3363         __ Pshufd(dst, dst, 0xaa);
3364       }
3365       break;
3366     }
3367     case kIA32S64x2UnpackHigh:
3368       ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhqdq);
3369       break;
3370     case kIA32S32x4UnpackHigh:
3371       ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhdq);
3372       break;
3373     case kIA32S16x8UnpackHigh:
3374       ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhwd);
3375       break;
3376     case kIA32S8x16UnpackHigh:
3377       ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhbw);
3378       break;
3379     case kIA32S64x2UnpackLow:
3380       ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpcklqdq);
3381       break;
3382     case kIA32S32x4UnpackLow:
3383       ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckldq);
3384       break;
3385     case kIA32S16x8UnpackLow:
3386       ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpcklwd);
3387       break;
3388     case kIA32S8x16UnpackLow:
3389       ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpcklbw);
3390       break;
3391     case kSSES16x8UnzipHigh: {
3392       CpuFeatureScope sse_scope(tasm(), SSE4_1);
3393       XMMRegister dst = i.OutputSimd128Register();
3394       XMMRegister src2 = dst;
3395       DCHECK_EQ(dst, i.InputSimd128Register(0));
3396       if (instr->InputCount() == 2) {
3397         __ movups(kScratchDoubleReg, i.InputOperand(1));
3398         __ psrld(kScratchDoubleReg, 16);
3399         src2 = kScratchDoubleReg;
3400       }
3401       __ psrld(dst, 16);
3402       __ packusdw(dst, src2);
3403       break;
3404     }
3405     case kAVXS16x8UnzipHigh: {
3406       CpuFeatureScope avx_scope(tasm(), AVX);
3407       XMMRegister dst = i.OutputSimd128Register();
3408       XMMRegister src2 = dst;
3409       if (instr->InputCount() == 2) {
3410         __ vpsrld(kScratchDoubleReg, i.InputSimd128Register(1), 16);
3411         src2 = kScratchDoubleReg;
3412       }
3413       __ vpsrld(dst, i.InputSimd128Register(0), 16);
3414       __ vpackusdw(dst, dst, src2);
3415       break;
3416     }
3417     case kSSES16x8UnzipLow: {
3418       CpuFeatureScope sse_scope(tasm(), SSE4_1);
3419       XMMRegister dst = i.OutputSimd128Register();
3420       XMMRegister src2 = dst;
3421       DCHECK_EQ(dst, i.InputSimd128Register(0));
3422       __ pxor(kScratchDoubleReg, kScratchDoubleReg);
3423       if (instr->InputCount() == 2) {
3424         __ pblendw(kScratchDoubleReg, i.InputOperand(1), 0x55);
3425         src2 = kScratchDoubleReg;
3426       }
3427       __ pblendw(dst, kScratchDoubleReg, 0xaa);
3428       __ packusdw(dst, src2);
3429       break;
3430     }
3431     case kAVXS16x8UnzipLow: {
3432       CpuFeatureScope avx_scope(tasm(), AVX);
3433       XMMRegister dst = i.OutputSimd128Register();
3434       XMMRegister src2 = dst;
3435       __ vpxor(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
3436       if (instr->InputCount() == 2) {
3437         __ vpblendw(kScratchDoubleReg, kScratchDoubleReg, i.InputOperand(1),
3438                     0x55);
3439         src2 = kScratchDoubleReg;
3440       }
3441       __ vpblendw(dst, kScratchDoubleReg, i.InputSimd128Register(0), 0x55);
3442       __ vpackusdw(dst, dst, src2);
3443       break;
3444     }
3445     case kSSES8x16UnzipHigh: {
3446       XMMRegister dst = i.OutputSimd128Register();
3447       XMMRegister src2 = dst;
3448       DCHECK_EQ(dst, i.InputSimd128Register(0));
3449       if (instr->InputCount() == 2) {
3450         __ movups(kScratchDoubleReg, i.InputOperand(1));
3451         __ psrlw(kScratchDoubleReg, 8);
3452         src2 = kScratchDoubleReg;
3453       }
3454       __ psrlw(dst, 8);
3455       __ packuswb(dst, src2);
3456       break;
3457     }
3458     case kAVXS8x16UnzipHigh: {
3459       CpuFeatureScope avx_scope(tasm(), AVX);
3460       XMMRegister dst = i.OutputSimd128Register();
3461       XMMRegister src2 = dst;
3462       if (instr->InputCount() == 2) {
3463         __ vpsrlw(kScratchDoubleReg, i.InputSimd128Register(1), 8);
3464         src2 = kScratchDoubleReg;
3465       }
3466       __ vpsrlw(dst, i.InputSimd128Register(0), 8);
3467       __ vpackuswb(dst, dst, src2);
3468       break;
3469     }
3470     case kSSES8x16UnzipLow: {
3471       XMMRegister dst = i.OutputSimd128Register();
3472       XMMRegister src2 = dst;
3473       DCHECK_EQ(dst, i.InputSimd128Register(0));
3474       if (instr->InputCount() == 2) {
3475         __ movups(kScratchDoubleReg, i.InputOperand(1));
3476         __ psllw(kScratchDoubleReg, 8);
3477         __ psrlw(kScratchDoubleReg, 8);
3478         src2 = kScratchDoubleReg;
3479       }
3480       __ psllw(dst, 8);
3481       __ psrlw(dst, 8);
3482       __ packuswb(dst, src2);
3483       break;
3484     }
3485     case kAVXS8x16UnzipLow: {
3486       CpuFeatureScope avx_scope(tasm(), AVX);
3487       XMMRegister dst = i.OutputSimd128Register();
3488       XMMRegister src2 = dst;
3489       if (instr->InputCount() == 2) {
3490         __ vpsllw(kScratchDoubleReg, i.InputSimd128Register(1), 8);
3491         __ vpsrlw(kScratchDoubleReg, kScratchDoubleReg, 8);
3492         src2 = kScratchDoubleReg;
3493       }
3494       __ vpsllw(dst, i.InputSimd128Register(0), 8);
3495       __ vpsrlw(dst, dst, 8);
3496       __ vpackuswb(dst, dst, src2);
3497       break;
3498     }
3499     case kSSES8x16TransposeLow: {
3500       XMMRegister dst = i.OutputSimd128Register();
3501       DCHECK_EQ(dst, i.InputSimd128Register(0));
3502       __ psllw(dst, 8);
3503       if (instr->InputCount() == 1) {
3504         __ movups(kScratchDoubleReg, dst);
3505       } else {
3506         DCHECK_EQ(2, instr->InputCount());
3507         __ movups(kScratchDoubleReg, i.InputOperand(1));
3508         __ psllw(kScratchDoubleReg, 8);
3509       }
3510       __ psrlw(dst, 8);
3511       __ por(dst, kScratchDoubleReg);
3512       break;
3513     }
3514     case kAVXS8x16TransposeLow: {
3515       CpuFeatureScope avx_scope(tasm(), AVX);
3516       XMMRegister dst = i.OutputSimd128Register();
3517       if (instr->InputCount() == 1) {
3518         __ vpsllw(kScratchDoubleReg, i.InputSimd128Register(0), 8);
3519         __ vpsrlw(dst, kScratchDoubleReg, 8);
3520       } else {
3521         DCHECK_EQ(2, instr->InputCount());
3522         __ vpsllw(kScratchDoubleReg, i.InputSimd128Register(1), 8);
3523         __ vpsllw(dst, i.InputSimd128Register(0), 8);
3524         __ vpsrlw(dst, dst, 8);
3525       }
3526       __ vpor(dst, dst, kScratchDoubleReg);
3527       break;
3528     }
3529     case kSSES8x16TransposeHigh: {
3530       XMMRegister dst = i.OutputSimd128Register();
3531       DCHECK_EQ(dst, i.InputSimd128Register(0));
3532       __ psrlw(dst, 8);
3533       if (instr->InputCount() == 1) {
3534         __ movups(kScratchDoubleReg, dst);
3535       } else {
3536         DCHECK_EQ(2, instr->InputCount());
3537         __ movups(kScratchDoubleReg, i.InputOperand(1));
3538         __ psrlw(kScratchDoubleReg, 8);
3539       }
3540       __ psllw(kScratchDoubleReg, 8);
3541       __ por(dst, kScratchDoubleReg);
3542       break;
3543     }
3544     case kAVXS8x16TransposeHigh: {
3545       CpuFeatureScope avx_scope(tasm(), AVX);
3546       XMMRegister dst = i.OutputSimd128Register();
3547       if (instr->InputCount() == 1) {
3548         __ vpsrlw(dst, i.InputSimd128Register(0), 8);
3549         __ vpsllw(kScratchDoubleReg, dst, 8);
3550       } else {
3551         DCHECK_EQ(2, instr->InputCount());
3552         __ vpsrlw(kScratchDoubleReg, i.InputSimd128Register(1), 8);
3553         __ vpsrlw(dst, i.InputSimd128Register(0), 8);
3554         __ vpsllw(kScratchDoubleReg, kScratchDoubleReg, 8);
3555       }
3556       __ vpor(dst, dst, kScratchDoubleReg);
3557       break;
3558     }
3559     case kSSES8x8Reverse:
3560     case kSSES8x4Reverse:
3561     case kSSES8x2Reverse: {
3562       DCHECK_EQ(1, instr->InputCount());
3563       XMMRegister dst = i.OutputSimd128Register();
3564       DCHECK_EQ(dst, i.InputSimd128Register(0));
3565       if (arch_opcode != kSSES8x2Reverse) {
3566         // First shuffle words into position.
3567         int8_t shuffle_mask = arch_opcode == kSSES8x4Reverse ? 0xB1 : 0x1B;
3568         __ pshuflw(dst, dst, shuffle_mask);
3569         __ pshufhw(dst, dst, shuffle_mask);
3570       }
3571       __ movaps(kScratchDoubleReg, dst);
3572       __ psrlw(kScratchDoubleReg, 8);
3573       __ psllw(dst, 8);
3574       __ por(dst, kScratchDoubleReg);
3575       break;
3576     }
3577     case kAVXS8x2Reverse:
3578     case kAVXS8x4Reverse:
3579     case kAVXS8x8Reverse: {
3580       DCHECK_EQ(1, instr->InputCount());
3581       CpuFeatureScope avx_scope(tasm(), AVX);
3582       XMMRegister dst = i.OutputSimd128Register();
3583       XMMRegister src = dst;
3584       if (arch_opcode != kAVXS8x2Reverse) {
3585         // First shuffle words into position.
3586         int8_t shuffle_mask = arch_opcode == kAVXS8x4Reverse ? 0xB1 : 0x1B;
3587         __ vpshuflw(dst, i.InputOperand(0), shuffle_mask);
3588         __ vpshufhw(dst, dst, shuffle_mask);
3589       } else {
3590         src = i.InputSimd128Register(0);
3591       }
3592       // Reverse each 16 bit lane.
3593       __ vpsrlw(kScratchDoubleReg, src, 8);
3594       __ vpsllw(dst, src, 8);
3595       __ vpor(dst, dst, kScratchDoubleReg);
3596       break;
3597     }
3598     case kIA32S1x4AnyTrue:
3599     case kIA32S1x8AnyTrue:
3600     case kIA32S1x16AnyTrue: {
3601       Register dst = i.OutputRegister();
3602       XMMRegister src = i.InputSimd128Register(0);
3603       Register tmp = i.TempRegister(0);
3604       __ xor_(tmp, tmp);
3605       __ mov(dst, Immediate(-1));
3606       __ Ptest(src, src);
3607       __ cmov(zero, dst, tmp);
3608       break;
3609     }
3610     case kIA32S1x4AllTrue:
3611     case kIA32S1x8AllTrue:
3612     case kIA32S1x16AllTrue: {
3613       Register dst = i.OutputRegister();
3614       Operand src = i.InputOperand(0);
3615       Register tmp = i.TempRegister(0);
3616       __ mov(tmp, Immediate(-1));
3617       __ xor_(dst, dst);
3618       // Compare all src lanes to false.
3619       __ Pxor(kScratchDoubleReg, kScratchDoubleReg);
3620       if (arch_opcode == kIA32S1x4AllTrue) {
3621         __ Pcmpeqd(kScratchDoubleReg, src);
3622       } else if (arch_opcode == kIA32S1x8AllTrue) {
3623         __ Pcmpeqw(kScratchDoubleReg, src);
3624       } else {
3625         __ Pcmpeqb(kScratchDoubleReg, src);
3626       }
3627       // If kScratchDoubleReg is all zero, none of src lanes are false.
3628       __ Ptest(kScratchDoubleReg, kScratchDoubleReg);
3629       __ cmov(zero, dst, tmp);
3630       break;
3631     }
3632     case kIA32StackCheck: {
3633       ExternalReference const stack_limit =
3634           ExternalReference::address_of_stack_limit(__ isolate());
3635       __ VerifyRootRegister();
3636       __ cmp(esp, tasm()->StaticVariable(stack_limit));
3637       break;
3638     }
3639     case kIA32Word32AtomicPairLoad: {
3640       XMMRegister tmp = i.ToDoubleRegister(instr->TempAt(0));
3641       __ movq(tmp, i.MemoryOperand());
3642       __ Pextrd(i.OutputRegister(0), tmp, 0);
3643       __ Pextrd(i.OutputRegister(1), tmp, 1);
3644       break;
3645     }
3646     case kIA32Word32AtomicPairStore: {
3647       __ mov(i.TempRegister(0), i.MemoryOperand(2));
3648       __ mov(i.TempRegister(1), i.NextMemoryOperand(2));
3649       __ lock();
3650       __ cmpxchg8b(i.MemoryOperand(2));
3651       break;
3652     }
3653     case kWord32AtomicExchangeInt8: {
3654       __ xchg_b(i.InputRegister(0), i.MemoryOperand(1));
3655       __ movsx_b(i.InputRegister(0), i.InputRegister(0));
3656       break;
3657     }
3658     case kWord32AtomicExchangeUint8: {
3659       __ xchg_b(i.InputRegister(0), i.MemoryOperand(1));
3660       __ movzx_b(i.InputRegister(0), i.InputRegister(0));
3661       break;
3662     }
3663     case kWord32AtomicExchangeInt16: {
3664       __ xchg_w(i.InputRegister(0), i.MemoryOperand(1));
3665       __ movsx_w(i.InputRegister(0), i.InputRegister(0));
3666       break;
3667     }
3668     case kWord32AtomicExchangeUint16: {
3669       __ xchg_w(i.InputRegister(0), i.MemoryOperand(1));
3670       __ movzx_w(i.InputRegister(0), i.InputRegister(0));
3671       break;
3672     }
3673     case kWord32AtomicExchangeWord32: {
3674       __ xchg(i.InputRegister(0), i.MemoryOperand(1));
3675       break;
3676     }
3677     // For the narrow Word64 operations below, i.OutputRegister(1) contains
3678     // the high-order 32 bits for the 64bit operation. As the data exchange
3679     // fits in one register, the i.OutputRegister(1) needs to be cleared for
3680     // the correct return value to be propagated back.
3681     case kIA32Word64AtomicNarrowExchangeUint8: {
3682       __ xchg_b(i.OutputRegister(0), i.MemoryOperand(1));
3683       __ movzx_b(i.OutputRegister(0), i.OutputRegister(0));
3684       __ xor_(i.OutputRegister(1), i.OutputRegister(1));
3685       break;
3686     }
3687     case kIA32Word64AtomicNarrowExchangeUint16: {
3688       __ xchg_w(i.OutputRegister(0), i.MemoryOperand(1));
3689       __ movzx_w(i.OutputRegister(0), i.OutputRegister(0));
3690       __ xor_(i.OutputRegister(1), i.OutputRegister(1));
3691       break;
3692     }
3693     case kIA32Word64AtomicNarrowExchangeUint32: {
3694       __ xchg(i.OutputRegister(0), i.MemoryOperand(1));
3695       __ xor_(i.OutputRegister(1), i.OutputRegister(1));
3696       break;
3697     }
3698     case kIA32Word32AtomicPairExchange: {
3699       __ mov(i.OutputRegister(0), i.MemoryOperand(2));
3700       __ mov(i.OutputRegister(1), i.NextMemoryOperand(2));
3701       __ lock();
3702       __ cmpxchg8b(i.MemoryOperand(2));
3703       break;
3704     }
3705     case kWord32AtomicCompareExchangeInt8: {
3706       __ lock();
3707       __ cmpxchg_b(i.MemoryOperand(2), i.InputRegister(1));
3708       __ movsx_b(eax, eax);
3709       break;
3710     }
3711     case kWord32AtomicCompareExchangeUint8: {
3712       __ lock();
3713       __ cmpxchg_b(i.MemoryOperand(2), i.InputRegister(1));
3714       __ movzx_b(eax, eax);
3715       break;
3716     }
3717     case kWord32AtomicCompareExchangeInt16: {
3718       __ lock();
3719       __ cmpxchg_w(i.MemoryOperand(2), i.InputRegister(1));
3720       __ movsx_w(eax, eax);
3721       break;
3722     }
3723     case kWord32AtomicCompareExchangeUint16: {
3724       __ lock();
3725       __ cmpxchg_w(i.MemoryOperand(2), i.InputRegister(1));
3726       __ movzx_w(eax, eax);
3727       break;
3728     }
3729     case kWord32AtomicCompareExchangeWord32: {
3730       __ lock();
3731       __ cmpxchg(i.MemoryOperand(2), i.InputRegister(1));
3732       break;
3733     }
3734     case kIA32Word64AtomicNarrowCompareExchangeUint8: {
3735       __ lock();
3736       __ cmpxchg_b(i.MemoryOperand(2), i.InputRegister(1));
3737       __ movzx_b(i.OutputRegister(0), i.OutputRegister(0));
3738       __ xor_(i.OutputRegister(1), i.OutputRegister(1));
3739       break;
3740     }
3741     case kIA32Word64AtomicNarrowCompareExchangeUint16: {
3742       __ lock();
3743       __ cmpxchg_w(i.MemoryOperand(2), i.InputRegister(1));
3744       __ movzx_w(i.OutputRegister(0), i.OutputRegister(0));
3745       __ xor_(i.OutputRegister(1), i.OutputRegister(1));
3746       break;
3747     }
3748     case kIA32Word64AtomicNarrowCompareExchangeUint32: {
3749       __ lock();
3750       __ cmpxchg(i.MemoryOperand(2), i.InputRegister(1));
3751       __ xor_(i.OutputRegister(1), i.OutputRegister(1));
3752       break;
3753     }
3754     case kIA32Word32AtomicPairCompareExchange: {
3755       __ lock();
3756       __ cmpxchg8b(i.MemoryOperand(4));
3757       break;
3758     }
3759 #define ATOMIC_BINOP_CASE(op, inst)                       \
3760   case kWord32Atomic##op##Int8: {                         \
3761     ASSEMBLE_ATOMIC_BINOP(inst, mov_b, cmpxchg_b);        \
3762     __ movsx_b(eax, eax);                                 \
3763     break;                                                \
3764   }                                                       \
3765   case kIA32Word64AtomicNarrow##op##Uint8: {              \
3766     ASSEMBLE_ATOMIC_BINOP(inst, mov_b, cmpxchg_b);        \
3767     __ movzx_b(i.OutputRegister(0), i.OutputRegister(0)); \
3768     __ xor_(i.OutputRegister(1), i.OutputRegister(1));    \
3769     break;                                                \
3770   }                                                       \
3771   case kWord32Atomic##op##Uint8: {                        \
3772     ASSEMBLE_ATOMIC_BINOP(inst, mov_b, cmpxchg_b);        \
3773     __ movzx_b(eax, eax);                                 \
3774     break;                                                \
3775   }                                                       \
3776   case kWord32Atomic##op##Int16: {                        \
3777     ASSEMBLE_ATOMIC_BINOP(inst, mov_w, cmpxchg_w);        \
3778     __ movsx_w(eax, eax);                                 \
3779     break;                                                \
3780   }                                                       \
3781   case kIA32Word64AtomicNarrow##op##Uint16: {             \
3782     ASSEMBLE_ATOMIC_BINOP(inst, mov_w, cmpxchg_w);        \
3783     __ movzx_w(i.OutputRegister(0), i.OutputRegister(0)); \
3784     __ xor_(i.OutputRegister(1), i.OutputRegister(1));    \
3785     break;                                                \
3786   }                                                       \
3787   case kWord32Atomic##op##Uint16: {                       \
3788     ASSEMBLE_ATOMIC_BINOP(inst, mov_w, cmpxchg_w);        \
3789     __ movzx_w(eax, eax);                                 \
3790     break;                                                \
3791   }                                                       \
3792   case kIA32Word64AtomicNarrow##op##Uint32: {             \
3793     ASSEMBLE_ATOMIC_BINOP(inst, mov, cmpxchg);            \
3794     __ xor_(i.OutputRegister(1), i.OutputRegister(1));    \
3795     break;                                                \
3796   }                                                       \
3797   case kWord32Atomic##op##Word32: {                       \
3798     ASSEMBLE_ATOMIC_BINOP(inst, mov, cmpxchg);            \
3799     break;                                                \
3800   }
3801       ATOMIC_BINOP_CASE(Add, add)
3802       ATOMIC_BINOP_CASE(Sub, sub)
3803       ATOMIC_BINOP_CASE(And, and_)
3804       ATOMIC_BINOP_CASE(Or, or_)
3805       ATOMIC_BINOP_CASE(Xor, xor_)
3806 #undef ATOMIC_BINOP_CASE
3807 #define ATOMIC_BINOP_CASE(op, instr1, instr2) \
3808   case kIA32Word32AtomicPair##op: {           \
3809     ASSEMBLE_I64ATOMIC_BINOP(instr1, instr2)  \
3810     break;                                    \
3811   }
3812       ATOMIC_BINOP_CASE(Add, add, adc)
3813       ATOMIC_BINOP_CASE(And, and_, and_)
3814       ATOMIC_BINOP_CASE(Or, or_, or_)
3815       ATOMIC_BINOP_CASE(Xor, xor_, xor_)
3816 #undef ATOMIC_BINOP_CASE
3817     case kIA32Word32AtomicPairSub: {
3818       Label binop;
3819       __ bind(&binop);
3820       // Move memory operand into edx:eax
3821       __ mov(i.OutputRegister(0), i.MemoryOperand(2));
3822       __ mov(i.OutputRegister(1), i.NextMemoryOperand(2));
3823       // Save input registers temporarily on the stack.
3824       __ push(i.InputRegister(0));
3825       __ push(i.InputRegister(1));
3826       // Negate input in place
3827       __ neg(i.InputRegister(0));
3828       __ adc(i.InputRegister(1), 0);
3829       __ neg(i.InputRegister(1));
3830       // Add memory operand, negated input.
3831       __ add(i.InputRegister(0), i.OutputRegister(0));
3832       __ adc(i.InputRegister(1), i.OutputRegister(1));
3833       __ lock();
3834       __ cmpxchg8b(i.MemoryOperand(2));
3835       // Restore input registers
3836       __ pop(i.InputRegister(1));
3837       __ pop(i.InputRegister(0));
3838       __ j(not_equal, &binop);
3839       break;
3840     }
3841     case kWord32AtomicLoadInt8:
3842     case kWord32AtomicLoadUint8:
3843     case kWord32AtomicLoadInt16:
3844     case kWord32AtomicLoadUint16:
3845     case kWord32AtomicLoadWord32:
3846     case kWord32AtomicStoreWord8:
3847     case kWord32AtomicStoreWord16:
3848     case kWord32AtomicStoreWord32:
3849       UNREACHABLE();  // Won't be generated by instruction selector.
3850       break;
3851   }
3852   return kSuccess;
3853 }  // NOLINT(readability/fn_size)
3854 
FlagsConditionToCondition(FlagsCondition condition)3855 static Condition FlagsConditionToCondition(FlagsCondition condition) {
3856   switch (condition) {
3857     case kUnorderedEqual:
3858     case kEqual:
3859       return equal;
3860       break;
3861     case kUnorderedNotEqual:
3862     case kNotEqual:
3863       return not_equal;
3864       break;
3865     case kSignedLessThan:
3866       return less;
3867       break;
3868     case kSignedGreaterThanOrEqual:
3869       return greater_equal;
3870       break;
3871     case kSignedLessThanOrEqual:
3872       return less_equal;
3873       break;
3874     case kSignedGreaterThan:
3875       return greater;
3876       break;
3877     case kUnsignedLessThan:
3878       return below;
3879       break;
3880     case kUnsignedGreaterThanOrEqual:
3881       return above_equal;
3882       break;
3883     case kUnsignedLessThanOrEqual:
3884       return below_equal;
3885       break;
3886     case kUnsignedGreaterThan:
3887       return above;
3888       break;
3889     case kOverflow:
3890       return overflow;
3891       break;
3892     case kNotOverflow:
3893       return no_overflow;
3894       break;
3895     default:
3896       UNREACHABLE();
3897       break;
3898   }
3899 }
3900 
3901 // Assembles a branch after an instruction.
AssembleArchBranch(Instruction * instr,BranchInfo * branch)3902 void CodeGenerator::AssembleArchBranch(Instruction* instr, BranchInfo* branch) {
3903   Label::Distance flabel_distance =
3904       branch->fallthru ? Label::kNear : Label::kFar;
3905   Label* tlabel = branch->true_label;
3906   Label* flabel = branch->false_label;
3907   if (branch->condition == kUnorderedEqual) {
3908     __ j(parity_even, flabel, flabel_distance);
3909   } else if (branch->condition == kUnorderedNotEqual) {
3910     __ j(parity_even, tlabel);
3911   }
3912   __ j(FlagsConditionToCondition(branch->condition), tlabel);
3913 
3914   // Add a jump if not falling through to the next block.
3915   if (!branch->fallthru) __ jmp(flabel);
3916 }
3917 
AssembleBranchPoisoning(FlagsCondition condition,Instruction * instr)3918 void CodeGenerator::AssembleBranchPoisoning(FlagsCondition condition,
3919                                             Instruction* instr) {
3920   // TODO(jarin) Handle float comparisons (kUnordered[Not]Equal).
3921   if (condition == kUnorderedEqual || condition == kUnorderedNotEqual) {
3922     return;
3923   }
3924 
3925   condition = NegateFlagsCondition(condition);
3926   __ setcc(FlagsConditionToCondition(condition), kSpeculationPoisonRegister);
3927   __ add(kSpeculationPoisonRegister, Immediate(255));
3928   __ sar(kSpeculationPoisonRegister, 31u);
3929 }
3930 
AssembleArchDeoptBranch(Instruction * instr,BranchInfo * branch)3931 void CodeGenerator::AssembleArchDeoptBranch(Instruction* instr,
3932                                             BranchInfo* branch) {
3933   AssembleArchBranch(instr, branch);
3934 }
3935 
AssembleArchJump(RpoNumber target)3936 void CodeGenerator::AssembleArchJump(RpoNumber target) {
3937   if (!IsNextInAssemblyOrder(target)) __ jmp(GetLabel(target));
3938 }
3939 
AssembleArchTrap(Instruction * instr,FlagsCondition condition)3940 void CodeGenerator::AssembleArchTrap(Instruction* instr,
3941                                      FlagsCondition condition) {
3942   class OutOfLineTrap final : public OutOfLineCode {
3943    public:
3944     OutOfLineTrap(CodeGenerator* gen, Instruction* instr)
3945         : OutOfLineCode(gen), instr_(instr), gen_(gen) {}
3946 
3947     void Generate() final {
3948       IA32OperandConverter i(gen_, instr_);
3949       TrapId trap_id =
3950           static_cast<TrapId>(i.InputInt32(instr_->InputCount() - 1));
3951       GenerateCallToTrap(trap_id);
3952     }
3953 
3954    private:
3955     void GenerateCallToTrap(TrapId trap_id) {
3956       if (trap_id == TrapId::kInvalid) {
3957         // We cannot test calls to the runtime in cctest/test-run-wasm.
3958         // Therefore we emit a call to C here instead of a call to the runtime.
3959         __ PrepareCallCFunction(0, esi);
3960         __ CallCFunction(
3961             ExternalReference::wasm_call_trap_callback_for_testing(), 0);
3962         __ LeaveFrame(StackFrame::WASM_COMPILED);
3963         auto call_descriptor = gen_->linkage()->GetIncomingDescriptor();
3964         size_t pop_size = call_descriptor->StackParameterCount() * kPointerSize;
3965         // Use ecx as a scratch register, we return anyways immediately.
3966         __ Ret(static_cast<int>(pop_size), ecx);
3967       } else {
3968         gen_->AssembleSourcePosition(instr_);
3969         // A direct call to a wasm runtime stub defined in this module.
3970         // Just encode the stub index. This will be patched at relocation.
3971         __ wasm_call(static_cast<Address>(trap_id), RelocInfo::WASM_STUB_CALL);
3972         ReferenceMap* reference_map =
3973             new (gen_->zone()) ReferenceMap(gen_->zone());
3974         gen_->RecordSafepoint(reference_map, Safepoint::kSimple, 0,
3975                               Safepoint::kNoLazyDeopt);
3976         __ AssertUnreachable(AbortReason::kUnexpectedReturnFromWasmTrap);
3977       }
3978     }
3979 
3980     Instruction* instr_;
3981     CodeGenerator* gen_;
3982   };
3983   auto ool = new (zone()) OutOfLineTrap(this, instr);
3984   Label* tlabel = ool->entry();
3985   Label end;
3986   if (condition == kUnorderedEqual) {
3987     __ j(parity_even, &end);
3988   } else if (condition == kUnorderedNotEqual) {
3989     __ j(parity_even, tlabel);
3990   }
3991   __ j(FlagsConditionToCondition(condition), tlabel);
3992   __ bind(&end);
3993 }
3994 
3995 // Assembles boolean materializations after an instruction.
AssembleArchBoolean(Instruction * instr,FlagsCondition condition)3996 void CodeGenerator::AssembleArchBoolean(Instruction* instr,
3997                                         FlagsCondition condition) {
3998   IA32OperandConverter i(this, instr);
3999   Label done;
4000 
4001   // Materialize a full 32-bit 1 or 0 value. The result register is always the
4002   // last output of the instruction.
4003   Label check;
4004   DCHECK_NE(0u, instr->OutputCount());
4005   Register reg = i.OutputRegister(instr->OutputCount() - 1);
4006   if (condition == kUnorderedEqual) {
4007     __ j(parity_odd, &check, Label::kNear);
4008     __ Move(reg, Immediate(0));
4009     __ jmp(&done, Label::kNear);
4010   } else if (condition == kUnorderedNotEqual) {
4011     __ j(parity_odd, &check, Label::kNear);
4012     __ mov(reg, Immediate(1));
4013     __ jmp(&done, Label::kNear);
4014   }
4015   Condition cc = FlagsConditionToCondition(condition);
4016 
4017   __ bind(&check);
4018   if (reg.is_byte_register()) {
4019     // setcc for byte registers (al, bl, cl, dl).
4020     __ setcc(cc, reg);
4021     __ movzx_b(reg, reg);
4022   } else {
4023     // Emit a branch to set a register to either 1 or 0.
4024     Label set;
4025     __ j(cc, &set, Label::kNear);
4026     __ Move(reg, Immediate(0));
4027     __ jmp(&done, Label::kNear);
4028     __ bind(&set);
4029     __ mov(reg, Immediate(1));
4030   }
4031   __ bind(&done);
4032 }
4033 
AssembleArchBinarySearchSwitch(Instruction * instr)4034 void CodeGenerator::AssembleArchBinarySearchSwitch(Instruction* instr) {
4035   IA32OperandConverter i(this, instr);
4036   Register input = i.InputRegister(0);
4037   std::vector<std::pair<int32_t, Label*>> cases;
4038   for (size_t index = 2; index < instr->InputCount(); index += 2) {
4039     cases.push_back({i.InputInt32(index + 0), GetLabel(i.InputRpo(index + 1))});
4040   }
4041   AssembleArchBinarySearchSwitchRange(input, i.InputRpo(1), cases.data(),
4042                                       cases.data() + cases.size());
4043 }
4044 
AssembleArchLookupSwitch(Instruction * instr)4045 void CodeGenerator::AssembleArchLookupSwitch(Instruction* instr) {
4046   IA32OperandConverter i(this, instr);
4047   Register input = i.InputRegister(0);
4048   for (size_t index = 2; index < instr->InputCount(); index += 2) {
4049     __ cmp(input, Immediate(i.InputInt32(index + 0)));
4050     __ j(equal, GetLabel(i.InputRpo(index + 1)));
4051   }
4052   AssembleArchJump(i.InputRpo(1));
4053 }
4054 
4055 
AssembleArchTableSwitch(Instruction * instr)4056 void CodeGenerator::AssembleArchTableSwitch(Instruction* instr) {
4057   IA32OperandConverter i(this, instr);
4058   Register input = i.InputRegister(0);
4059   size_t const case_count = instr->InputCount() - 2;
4060   Label** cases = zone()->NewArray<Label*>(case_count);
4061   for (size_t index = 0; index < case_count; ++index) {
4062     cases[index] = GetLabel(i.InputRpo(index + 2));
4063   }
4064   Label* const table = AddJumpTable(cases, case_count);
4065   __ cmp(input, Immediate(case_count));
4066   __ j(above_equal, GetLabel(i.InputRpo(1)));
4067   __ jmp(Operand::JumpTable(input, times_4, table));
4068 }
4069 
4070 
4071 // The calling convention for JSFunctions on IA32 passes arguments on the
4072 // stack and the JSFunction and context in EDI and ESI, respectively, thus
4073 // the steps of the call look as follows:
4074 
4075 // --{ before the call instruction }--------------------------------------------
4076 //                                                         |  caller frame |
4077 //                                                         ^ esp           ^ ebp
4078 
4079 // --{ push arguments and setup ESI, EDI }--------------------------------------
4080 //                                       | args + receiver |  caller frame |
4081 //                                       ^ esp                             ^ ebp
4082 //                 [edi = JSFunction, esi = context]
4083 
4084 // --{ call [edi + kCodeEntryOffset] }------------------------------------------
4085 //                                 | RET | args + receiver |  caller frame |
4086 //                                 ^ esp                                   ^ ebp
4087 
4088 // =={ prologue of called function }============================================
4089 // --{ push ebp }---------------------------------------------------------------
4090 //                            | FP | RET | args + receiver |  caller frame |
4091 //                            ^ esp                                        ^ ebp
4092 
4093 // --{ mov ebp, esp }-----------------------------------------------------------
4094 //                            | FP | RET | args + receiver |  caller frame |
4095 //                            ^ ebp,esp
4096 
4097 // --{ push esi }---------------------------------------------------------------
4098 //                      | CTX | FP | RET | args + receiver |  caller frame |
4099 //                      ^esp  ^ ebp
4100 
4101 // --{ push edi }---------------------------------------------------------------
4102 //                | FNC | CTX | FP | RET | args + receiver |  caller frame |
4103 //                ^esp        ^ ebp
4104 
4105 // --{ subi esp, #N }-----------------------------------------------------------
4106 // | callee frame | FNC | CTX | FP | RET | args + receiver |  caller frame |
4107 // ^esp                       ^ ebp
4108 
4109 // =={ body of called function }================================================
4110 
4111 // =={ epilogue of called function }============================================
4112 // --{ mov esp, ebp }-----------------------------------------------------------
4113 //                            | FP | RET | args + receiver |  caller frame |
4114 //                            ^ esp,ebp
4115 
4116 // --{ pop ebp }-----------------------------------------------------------
4117 // |                               | RET | args + receiver |  caller frame |
4118 //                                 ^ esp                                   ^ ebp
4119 
4120 // --{ ret #A+1 }-----------------------------------------------------------
4121 // |                                                       |  caller frame |
4122 //                                                         ^ esp           ^ ebp
4123 
4124 // Runtime function calls are accomplished by doing a stub call to the
4125 // CEntry (a real code object). On IA32 passes arguments on the
4126 // stack, the number of arguments in EAX, the address of the runtime function
4127 // in EBX, and the context in ESI.
4128 
4129 // --{ before the call instruction }--------------------------------------------
4130 //                                                         |  caller frame |
4131 //                                                         ^ esp           ^ ebp
4132 
4133 // --{ push arguments and setup EAX, EBX, and ESI }-----------------------------
4134 //                                       | args + receiver |  caller frame |
4135 //                                       ^ esp                             ^ ebp
4136 //              [eax = #args, ebx = runtime function, esi = context]
4137 
4138 // --{ call #CEntry }-----------------------------------------------------------
4139 //                                 | RET | args + receiver |  caller frame |
4140 //                                 ^ esp                                   ^ ebp
4141 
4142 // =={ body of runtime function }===============================================
4143 
4144 // --{ runtime returns }--------------------------------------------------------
4145 //                                                         |  caller frame |
4146 //                                                         ^ esp           ^ ebp
4147 
4148 // Other custom linkages (e.g. for calling directly into and out of C++) may
4149 // need to save callee-saved registers on the stack, which is done in the
4150 // function prologue of generated code.
4151 
4152 // --{ before the call instruction }--------------------------------------------
4153 //                                                         |  caller frame |
4154 //                                                         ^ esp           ^ ebp
4155 
4156 // --{ set up arguments in registers on stack }---------------------------------
4157 //                                                  | args |  caller frame |
4158 //                                                  ^ esp                  ^ ebp
4159 //                  [r0 = arg0, r1 = arg1, ...]
4160 
4161 // --{ call code }--------------------------------------------------------------
4162 //                                            | RET | args |  caller frame |
4163 //                                            ^ esp                        ^ ebp
4164 
4165 // =={ prologue of called function }============================================
4166 // --{ push ebp }---------------------------------------------------------------
4167 //                                       | FP | RET | args |  caller frame |
4168 //                                       ^ esp                             ^ ebp
4169 
4170 // --{ mov ebp, esp }-----------------------------------------------------------
4171 //                                       | FP | RET | args |  caller frame |
4172 //                                       ^ ebp,esp
4173 
4174 // --{ save registers }---------------------------------------------------------
4175 //                                | regs | FP | RET | args |  caller frame |
4176 //                                ^ esp  ^ ebp
4177 
4178 // --{ subi esp, #N }-----------------------------------------------------------
4179 //                 | callee frame | regs | FP | RET | args |  caller frame |
4180 //                 ^esp                  ^ ebp
4181 
4182 // =={ body of called function }================================================
4183 
4184 // =={ epilogue of called function }============================================
4185 // --{ restore registers }------------------------------------------------------
4186 //                                | regs | FP | RET | args |  caller frame |
4187 //                                ^ esp  ^ ebp
4188 
4189 // --{ mov esp, ebp }-----------------------------------------------------------
4190 //                                       | FP | RET | args |  caller frame |
4191 //                                       ^ esp,ebp
4192 
4193 // --{ pop ebp }----------------------------------------------------------------
4194 //                                            | RET | args |  caller frame |
4195 //                                            ^ esp                        ^ ebp
4196 
FinishFrame(Frame * frame)4197 void CodeGenerator::FinishFrame(Frame* frame) {
4198   auto call_descriptor = linkage()->GetIncomingDescriptor();
4199   const RegList saves = call_descriptor->CalleeSavedRegisters();
4200   if (saves != 0) {  // Save callee-saved registers.
4201     DCHECK(!info()->is_osr());
4202     int pushed = 0;
4203     for (int i = Register::kNumRegisters - 1; i >= 0; i--) {
4204       if (!((1 << i) & saves)) continue;
4205       ++pushed;
4206     }
4207     frame->AllocateSavedCalleeRegisterSlots(pushed);
4208   }
4209 }
4210 
AssembleConstructFrame()4211 void CodeGenerator::AssembleConstructFrame() {
4212   auto call_descriptor = linkage()->GetIncomingDescriptor();
4213   if (frame_access_state()->has_frame()) {
4214     if (call_descriptor->IsCFunctionCall()) {
4215       __ push(ebp);
4216       __ mov(ebp, esp);
4217     } else if (call_descriptor->IsJSFunctionCall()) {
4218       __ Prologue();
4219       if (call_descriptor->PushArgumentCount()) {
4220         __ push(kJavaScriptCallArgCountRegister);
4221       }
4222     } else {
4223       __ StubPrologue(info()->GetOutputStackFrameType());
4224       if (call_descriptor->IsWasmFunctionCall()) {
4225         __ push(kWasmInstanceRegister);
4226       }
4227     }
4228   }
4229 
4230   int shrink_slots = frame()->GetTotalFrameSlotCount() -
4231                      call_descriptor->CalculateFixedFrameSize();
4232 
4233   if (info()->is_osr()) {
4234     // TurboFan OSR-compiled functions cannot be entered directly.
4235     __ Abort(AbortReason::kShouldNotDirectlyEnterOsrFunction);
4236 
4237     // Unoptimized code jumps directly to this entrypoint while the unoptimized
4238     // frame is still on the stack. Optimized code uses OSR values directly from
4239     // the unoptimized frame. Thus, all that needs to be done is to allocate the
4240     // remaining stack slots.
4241     if (FLAG_code_comments) __ RecordComment("-- OSR entrypoint --");
4242     osr_pc_offset_ = __ pc_offset();
4243     shrink_slots -= osr_helper()->UnoptimizedFrameSlots();
4244     ResetSpeculationPoison();
4245   }
4246 
4247   const RegList saves = call_descriptor->CalleeSavedRegisters();
4248   if (shrink_slots > 0) {
4249     DCHECK(frame_access_state()->has_frame());
4250     if (info()->IsWasm() && shrink_slots > 128) {
4251       // For WebAssembly functions with big frames we have to do the stack
4252       // overflow check before we construct the frame. Otherwise we may not
4253       // have enough space on the stack to call the runtime for the stack
4254       // overflow.
4255       Label done;
4256 
4257       // If the frame is bigger than the stack, we throw the stack overflow
4258       // exception unconditionally. Thereby we can avoid the integer overflow
4259       // check in the condition code.
4260       if (shrink_slots * kPointerSize < FLAG_stack_size * 1024) {
4261         Register scratch = esi;
4262         __ push(scratch);
4263         __ mov(scratch,
4264                FieldOperand(kWasmInstanceRegister,
4265                             WasmInstanceObject::kRealStackLimitAddressOffset));
4266         __ mov(scratch, Operand(scratch, 0));
4267         __ add(scratch, Immediate(shrink_slots * kPointerSize));
4268         __ cmp(esp, scratch);
4269         __ pop(scratch);
4270         __ j(above_equal, &done);
4271       }
4272       __ mov(ecx, FieldOperand(kWasmInstanceRegister,
4273                                WasmInstanceObject::kCEntryStubOffset));
4274       __ Move(esi, Smi::kZero);
4275       __ CallRuntimeWithCEntry(Runtime::kThrowWasmStackOverflow, ecx);
4276       ReferenceMap* reference_map = new (zone()) ReferenceMap(zone());
4277       RecordSafepoint(reference_map, Safepoint::kSimple, 0,
4278                       Safepoint::kNoLazyDeopt);
4279       __ AssertUnreachable(AbortReason::kUnexpectedReturnFromWasmTrap);
4280       __ bind(&done);
4281     }
4282 
4283     // Skip callee-saved and return slots, which are created below.
4284     shrink_slots -= base::bits::CountPopulation(saves);
4285     shrink_slots -= frame()->GetReturnSlotCount();
4286     if (shrink_slots > 0) {
4287       __ sub(esp, Immediate(shrink_slots * kPointerSize));
4288     }
4289   }
4290 
4291   if (saves != 0) {  // Save callee-saved registers.
4292     DCHECK(!info()->is_osr());
4293     for (int i = Register::kNumRegisters - 1; i >= 0; i--) {
4294       if (((1 << i) & saves)) __ push(Register::from_code(i));
4295     }
4296   }
4297 
4298   // Allocate return slots (located after callee-saved).
4299   if (frame()->GetReturnSlotCount() > 0) {
4300     __ sub(esp, Immediate(frame()->GetReturnSlotCount() * kPointerSize));
4301   }
4302 }
4303 
AssembleReturn(InstructionOperand * pop)4304 void CodeGenerator::AssembleReturn(InstructionOperand* pop) {
4305   auto call_descriptor = linkage()->GetIncomingDescriptor();
4306 
4307   const RegList saves = call_descriptor->CalleeSavedRegisters();
4308   // Restore registers.
4309   if (saves != 0) {
4310     const int returns = frame()->GetReturnSlotCount();
4311     if (returns != 0) {
4312       __ add(esp, Immediate(returns * kPointerSize));
4313     }
4314     for (int i = 0; i < Register::kNumRegisters; i++) {
4315       if (!((1 << i) & saves)) continue;
4316       __ pop(Register::from_code(i));
4317     }
4318   }
4319 
4320   // Might need ecx for scratch if pop_size is too big or if there is a variable
4321   // pop count.
4322   DCHECK_EQ(0u, call_descriptor->CalleeSavedRegisters() & ecx.bit());
4323   size_t pop_size = call_descriptor->StackParameterCount() * kPointerSize;
4324   IA32OperandConverter g(this, nullptr);
4325   if (call_descriptor->IsCFunctionCall()) {
4326     AssembleDeconstructFrame();
4327   } else if (frame_access_state()->has_frame()) {
4328     // Canonicalize JSFunction return sites for now if they always have the same
4329     // number of return args.
4330     if (pop->IsImmediate() && g.ToConstant(pop).ToInt32() == 0) {
4331       if (return_label_.is_bound()) {
4332         __ jmp(&return_label_);
4333         return;
4334       } else {
4335         __ bind(&return_label_);
4336         AssembleDeconstructFrame();
4337       }
4338     } else {
4339       AssembleDeconstructFrame();
4340     }
4341   }
4342   DCHECK_EQ(0u, call_descriptor->CalleeSavedRegisters() & edx.bit());
4343   DCHECK_EQ(0u, call_descriptor->CalleeSavedRegisters() & ecx.bit());
4344   if (pop->IsImmediate()) {
4345     DCHECK_EQ(Constant::kInt32, g.ToConstant(pop).type());
4346     pop_size += g.ToConstant(pop).ToInt32() * kPointerSize;
4347     __ Ret(static_cast<int>(pop_size), ecx);
4348   } else {
4349     Register pop_reg = g.ToRegister(pop);
4350     Register scratch_reg = pop_reg == ecx ? edx : ecx;
4351     __ pop(scratch_reg);
4352     __ lea(esp, Operand(esp, pop_reg, times_4, static_cast<int>(pop_size)));
4353     __ jmp(scratch_reg);
4354   }
4355 }
4356 
FinishCode()4357 void CodeGenerator::FinishCode() {}
4358 
AssembleMove(InstructionOperand * source,InstructionOperand * destination)4359 void CodeGenerator::AssembleMove(InstructionOperand* source,
4360                                  InstructionOperand* destination) {
4361   IA32OperandConverter g(this, nullptr);
4362   // Dispatch on the source and destination operand kinds.
4363   switch (MoveType::InferMove(source, destination)) {
4364     case MoveType::kRegisterToRegister:
4365       if (source->IsRegister()) {
4366         __ mov(g.ToRegister(destination), g.ToRegister(source));
4367       } else {
4368         DCHECK(source->IsFPRegister());
4369         __ movaps(g.ToDoubleRegister(destination), g.ToDoubleRegister(source));
4370       }
4371       return;
4372     case MoveType::kRegisterToStack: {
4373       Operand dst = g.ToOperand(destination);
4374       if (source->IsRegister()) {
4375         __ mov(dst, g.ToRegister(source));
4376       } else {
4377         DCHECK(source->IsFPRegister());
4378         XMMRegister src = g.ToDoubleRegister(source);
4379         MachineRepresentation rep =
4380             LocationOperand::cast(source)->representation();
4381         if (rep == MachineRepresentation::kFloat32) {
4382           __ movss(dst, src);
4383         } else if (rep == MachineRepresentation::kFloat64) {
4384           __ movsd(dst, src);
4385         } else {
4386           DCHECK_EQ(MachineRepresentation::kSimd128, rep);
4387           __ movups(dst, src);
4388         }
4389       }
4390       return;
4391     }
4392     case MoveType::kStackToRegister: {
4393       Operand src = g.ToOperand(source);
4394       if (source->IsStackSlot()) {
4395         __ mov(g.ToRegister(destination), src);
4396       } else {
4397         DCHECK(source->IsFPStackSlot());
4398         XMMRegister dst = g.ToDoubleRegister(destination);
4399         MachineRepresentation rep =
4400             LocationOperand::cast(source)->representation();
4401         if (rep == MachineRepresentation::kFloat32) {
4402           __ movss(dst, src);
4403         } else if (rep == MachineRepresentation::kFloat64) {
4404           __ movsd(dst, src);
4405         } else {
4406           DCHECK_EQ(MachineRepresentation::kSimd128, rep);
4407           __ movups(dst, src);
4408         }
4409       }
4410       return;
4411     }
4412     case MoveType::kStackToStack: {
4413       Operand src = g.ToOperand(source);
4414       Operand dst = g.ToOperand(destination);
4415       if (source->IsStackSlot()) {
4416         __ push(src);
4417         __ pop(dst);
4418       } else {
4419         MachineRepresentation rep =
4420             LocationOperand::cast(source)->representation();
4421         if (rep == MachineRepresentation::kFloat32) {
4422           __ movss(kScratchDoubleReg, src);
4423           __ movss(dst, kScratchDoubleReg);
4424         } else if (rep == MachineRepresentation::kFloat64) {
4425           __ movsd(kScratchDoubleReg, src);
4426           __ movsd(dst, kScratchDoubleReg);
4427         } else {
4428           DCHECK_EQ(MachineRepresentation::kSimd128, rep);
4429           __ movups(kScratchDoubleReg, src);
4430           __ movups(dst, kScratchDoubleReg);
4431         }
4432       }
4433       return;
4434     }
4435     case MoveType::kConstantToRegister: {
4436       Constant src = g.ToConstant(source);
4437       if (destination->IsRegister()) {
4438         Register dst = g.ToRegister(destination);
4439         if (src.type() == Constant::kHeapObject) {
4440           __ Move(dst, src.ToHeapObject());
4441         } else {
4442           __ Move(dst, g.ToImmediate(source));
4443         }
4444       } else {
4445         DCHECK(destination->IsFPRegister());
4446         XMMRegister dst = g.ToDoubleRegister(destination);
4447         if (src.type() == Constant::kFloat32) {
4448           // TODO(turbofan): Can we do better here?
4449           __ Move(dst, src.ToFloat32AsInt());
4450         } else {
4451           DCHECK_EQ(src.type(), Constant::kFloat64);
4452           __ Move(dst, src.ToFloat64().AsUint64());
4453         }
4454       }
4455       return;
4456     }
4457     case MoveType::kConstantToStack: {
4458       Constant src = g.ToConstant(source);
4459       Operand dst = g.ToOperand(destination);
4460       if (destination->IsStackSlot()) {
4461         if (src.type() == Constant::kHeapObject) {
4462           __ mov(dst, src.ToHeapObject());
4463         } else {
4464           __ Move(dst, g.ToImmediate(source));
4465         }
4466       } else {
4467         DCHECK(destination->IsFPStackSlot());
4468         if (src.type() == Constant::kFloat32) {
4469           __ Move(dst, Immediate(src.ToFloat32AsInt()));
4470         } else {
4471           DCHECK_EQ(src.type(), Constant::kFloat64);
4472           uint64_t constant_value = src.ToFloat64().AsUint64();
4473           uint32_t lower = static_cast<uint32_t>(constant_value);
4474           uint32_t upper = static_cast<uint32_t>(constant_value >> 32);
4475           Operand dst0 = dst;
4476           Operand dst1 = g.ToOperand(destination, kPointerSize);
4477           __ Move(dst0, Immediate(lower));
4478           __ Move(dst1, Immediate(upper));
4479         }
4480       }
4481       return;
4482     }
4483   }
4484   UNREACHABLE();
4485 }
4486 
4487 
AssembleSwap(InstructionOperand * source,InstructionOperand * destination)4488 void CodeGenerator::AssembleSwap(InstructionOperand* source,
4489                                  InstructionOperand* destination) {
4490   IA32OperandConverter g(this, nullptr);
4491   // Dispatch on the source and destination operand kinds.  Not all
4492   // combinations are possible.
4493   switch (MoveType::InferSwap(source, destination)) {
4494     case MoveType::kRegisterToRegister: {
4495       if (source->IsRegister()) {
4496         Register src = g.ToRegister(source);
4497         Register dst = g.ToRegister(destination);
4498         __ push(src);
4499         __ mov(src, dst);
4500         __ pop(dst);
4501       } else {
4502         DCHECK(source->IsFPRegister());
4503         XMMRegister src = g.ToDoubleRegister(source);
4504         XMMRegister dst = g.ToDoubleRegister(destination);
4505         __ movaps(kScratchDoubleReg, src);
4506         __ movaps(src, dst);
4507         __ movaps(dst, kScratchDoubleReg);
4508       }
4509       return;
4510     }
4511     case MoveType::kRegisterToStack: {
4512       if (source->IsRegister()) {
4513         Register src = g.ToRegister(source);
4514         __ push(src);
4515         frame_access_state()->IncreaseSPDelta(1);
4516         Operand dst = g.ToOperand(destination);
4517         __ mov(src, dst);
4518         frame_access_state()->IncreaseSPDelta(-1);
4519         dst = g.ToOperand(destination);
4520         __ pop(dst);
4521       } else {
4522         DCHECK(source->IsFPRegister());
4523         XMMRegister src = g.ToDoubleRegister(source);
4524         Operand dst = g.ToOperand(destination);
4525         MachineRepresentation rep =
4526             LocationOperand::cast(source)->representation();
4527         if (rep == MachineRepresentation::kFloat32) {
4528           __ movss(kScratchDoubleReg, dst);
4529           __ movss(dst, src);
4530           __ movaps(src, kScratchDoubleReg);
4531         } else if (rep == MachineRepresentation::kFloat64) {
4532           __ movsd(kScratchDoubleReg, dst);
4533           __ movsd(dst, src);
4534           __ movaps(src, kScratchDoubleReg);
4535         } else {
4536           DCHECK_EQ(MachineRepresentation::kSimd128, rep);
4537           __ movups(kScratchDoubleReg, dst);
4538           __ movups(dst, src);
4539           __ movups(src, kScratchDoubleReg);
4540         }
4541       }
4542       return;
4543     }
4544     case MoveType::kStackToStack: {
4545       if (source->IsStackSlot()) {
4546         Operand dst1 = g.ToOperand(destination);
4547         __ push(dst1);
4548         frame_access_state()->IncreaseSPDelta(1);
4549         Operand src1 = g.ToOperand(source);
4550         __ push(src1);
4551         Operand dst2 = g.ToOperand(destination);
4552         __ pop(dst2);
4553         frame_access_state()->IncreaseSPDelta(-1);
4554         Operand src2 = g.ToOperand(source);
4555         __ pop(src2);
4556       } else {
4557         DCHECK(source->IsFPStackSlot());
4558         Operand src0 = g.ToOperand(source);
4559         Operand dst0 = g.ToOperand(destination);
4560         MachineRepresentation rep =
4561             LocationOperand::cast(source)->representation();
4562         if (rep == MachineRepresentation::kFloat32) {
4563           __ movss(kScratchDoubleReg, dst0);  // Save dst in scratch register.
4564           __ push(src0);  // Then use stack to copy src to destination.
4565           __ pop(dst0);
4566           __ movss(src0, kScratchDoubleReg);
4567         } else if (rep == MachineRepresentation::kFloat64) {
4568           __ movsd(kScratchDoubleReg, dst0);  // Save dst in scratch register.
4569           __ push(src0);  // Then use stack to copy src to destination.
4570           __ pop(dst0);
4571           __ push(g.ToOperand(source, kPointerSize));
4572           __ pop(g.ToOperand(destination, kPointerSize));
4573           __ movsd(src0, kScratchDoubleReg);
4574         } else {
4575           DCHECK_EQ(MachineRepresentation::kSimd128, rep);
4576           __ movups(kScratchDoubleReg, dst0);  // Save dst in scratch register.
4577           __ push(src0);  // Then use stack to copy src to destination.
4578           __ pop(dst0);
4579           __ push(g.ToOperand(source, kPointerSize));
4580           __ pop(g.ToOperand(destination, kPointerSize));
4581           __ push(g.ToOperand(source, 2 * kPointerSize));
4582           __ pop(g.ToOperand(destination, 2 * kPointerSize));
4583           __ push(g.ToOperand(source, 3 * kPointerSize));
4584           __ pop(g.ToOperand(destination, 3 * kPointerSize));
4585           __ movups(src0, kScratchDoubleReg);
4586         }
4587       }
4588       return;
4589     }
4590     default:
4591       UNREACHABLE();
4592       break;
4593   }
4594 }
4595 
4596 
AssembleJumpTable(Label ** targets,size_t target_count)4597 void CodeGenerator::AssembleJumpTable(Label** targets, size_t target_count) {
4598   for (size_t index = 0; index < target_count; ++index) {
4599     __ dd(targets[index]);
4600   }
4601 }
4602 
4603 #undef __
4604 #undef kScratchDoubleReg
4605 #undef ASSEMBLE_COMPARE
4606 #undef ASSEMBLE_IEEE754_BINOP
4607 #undef ASSEMBLE_IEEE754_UNOP
4608 #undef ASSEMBLE_BINOP
4609 #undef ASSEMBLE_ATOMIC_BINOP
4610 #undef ASSEMBLE_I64ATOMIC_BINOP
4611 #undef ASSEMBLE_MOVX
4612 #undef ASSEMBLE_SIMD_PUNPCK_SHUFFLE
4613 #undef ASSEMBLE_SIMD_IMM_SHUFFLE
4614 
4615 }  // namespace compiler
4616 }  // namespace internal
4617 }  // namespace v8
4618