1 /*
2  * Copyright (C) 2014 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef BERBERIS_ASSEMBLER_COMMON_X86_H_
18 #define BERBERIS_ASSEMBLER_COMMON_X86_H_
19 
20 #include <cstddef>  // std::size_t
21 #include <cstdint>
22 #include <type_traits>  // std::enable_if, std::is_integral
23 
24 #include "berberis/assembler/common.h"
25 #include "berberis/base/bit_util.h"
26 #include "berberis/base/checks.h"
27 #include "berberis/base/macros.h"  // DISALLOW_IMPLICIT_CONSTRUCTORS
28 
29 namespace berberis {
30 
31 // AssemblerX86 includes implementation of most x86 assembler instructions.
32 //
33 // x86-32 and x86-64 assemblers are nearly identical, but difference lies in handling
34 // of very low-level instruction details: almost all instructions on x86-64 could include
35 // REX byte which is needed if new registers (%r8 to %r15 or %xmm8 to %xmm15) are used.
36 //
37 // To handle that difference efficiently AssemblerX86 is CRTP class: it's parameterized
38 // by its own descendant and pull certain functions (e.g. GetHighBit or Rex8Size) from
39 // its implementation.
40 //
41 // Certain functions are only implemented by its descendant (since there are instructions
42 // which only exist in x86-32 mode and instructions which only exist in x86-64 mode).
43 
44 template <typename Assembler>
45 class AssemblerX86 : public AssemblerBase {
46  public:
AssemblerX86(MachineCode * code)47   explicit AssemblerX86(MachineCode* code) : AssemblerBase(code) {}
48 
49   enum class Condition {
50     kInvalidCondition = -1,
51 
52     kOverflow = 0,
53     kNoOverflow = 1,
54     kBelow = 2,
55     kAboveEqual = 3,
56     kEqual = 4,
57     kNotEqual = 5,
58     kBelowEqual = 6,
59     kAbove = 7,
60     kNegative = 8,
61     kPositive = 9,
62     kParityEven = 10,
63     kParityOdd = 11,
64     kLess = 12,
65     kGreaterEqual = 13,
66     kLessEqual = 14,
67     kGreater = 15,
68     kAlways = 16,
69     kNever = 17,
70 
71     // aka...
72     kCarry = kBelow,
73     kNotCarry = kAboveEqual,
74     kZero = kEqual,
75     kNotZero = kNotEqual,
76     kSign = kNegative,
77     kNotSign = kPositive
78   };
79 
80   struct Register {
81     // Note: we couldn't make the following private because of peculiarities of C++ (see
82     // https://stackoverflow.com/questions/24527395/compiler-error-when-initializing-constexpr-static-class-member
83     // for explanation), but you are not supposed to access num or use GetHighBit() and GetLowBits()
84     // functions.  Treat that type as opaque cookie.
85 
86     constexpr bool operator==(const Register& reg) const { return num == reg.num; }
87 
88     constexpr bool operator!=(const Register& reg) const { return num != reg.num; }
89 
90     uint8_t num;
91   };
92 
93   struct X87Register {
94     // Note: we couldn't make the following private because of peculiarities of C++ (see
95     // https://stackoverflow.com/questions/24527395/compiler-error-when-initializing-constexpr-static-class-member
96     // for explanation), but you are not supposed to access num or use GetHighBit() and GetLowBits()
97     // functions.  Treat that type as opaque cookie.
98 
99     constexpr bool operator==(const Register& reg) const { return num == reg.num; }
100 
101     constexpr bool operator!=(const Register& reg) const { return num != reg.num; }
102 
103     uint8_t num;
104   };
105 
106   static constexpr X87Register st{0};
107   static constexpr X87Register st0{0};
108   static constexpr X87Register st1{1};
109   static constexpr X87Register st2{2};
110   static constexpr X87Register st3{3};
111   static constexpr X87Register st4{4};
112   static constexpr X87Register st5{5};
113   static constexpr X87Register st6{6};
114   static constexpr X87Register st7{7};
115 
116   struct XMMRegister {
117     // Note: we couldn't make the following private because of peculiarities of C++ (see
118     // https://stackoverflow.com/questions/24527395/compiler-error-when-initializing-constexpr-static-class-member
119     // for explanation), but you are not supposed to access num or use GetHighBit() and GetLowBits()
120     // functions.  Treat that type as opaque cookie.
121 
122     constexpr bool operator==(const XMMRegister& reg) const { return num == reg.num; }
123 
124     constexpr bool operator!=(const XMMRegister& reg) const { return num != reg.num; }
125 
126     uint8_t num;
127   };
128 
129   enum ScaleFactor { kTimesOne = 0, kTimesTwo = 1, kTimesFour = 2, kTimesEight = 3 };
130 
131   struct Operand {
rexOperand132     constexpr uint8_t rex() const {
133       return Assembler::kIsX86_64 ? ((index.num & 0x08) >> 2) | ((base.num & 0x08) >> 3) : 0;
134     }
135 
RequiresRexOperand136     constexpr bool RequiresRex() const {
137       return Assembler::kIsX86_64 ? ((index.num & 0x08) | (base.num & 0x08)) : false;
138     }
139 
140     Register base = Assembler::no_register;
141     Register index = Assembler::no_register;
142     ScaleFactor scale = kTimesOne;
143     int32_t disp = 0;
144   };
145 
146   struct LabelOperand {
147     const Label& label;
148   };
149 
150   // Macro operations.
Finalize()151   void Finalize() { ResolveJumps(); }
152 
P2Align(uint32_t m)153   void P2Align(uint32_t m) {
154     uint32_t mask = m - 1;
155     uint32_t addr = pc();
156     Nop((m - (addr & mask)) & mask);
157   }
158 
Nop(uint32_t bytes)159   void Nop(uint32_t bytes) {
160     static const uint32_t kNumNops = 15;
161     static const uint8_t nop1[] = {0x90};
162     static const uint8_t nop2[] = {0x66, 0x90};
163     static const uint8_t nop3[] = {0x0f, 0x1f, 0x00};
164     static const uint8_t nop4[] = {0x0f, 0x1f, 0x40, 0x00};
165     static const uint8_t nop5[] = {0x0f, 0x1f, 0x44, 0x00, 0x00};
166     static const uint8_t nop6[] = {0x66, 0x0f, 0x1f, 0x44, 0x00, 0x0};
167     static const uint8_t nop7[] = {0x0f, 0x1f, 0x80, 0x00, 0x00, 0x0, 0x00};
168     static const uint8_t nop8[] = {0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00};
169     static const uint8_t nop9[] = {0x66, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00};
170     static const uint8_t nop10[] = {0x66, 0x2e, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00};
171     static const uint8_t nop11[] = {
172         0x66, 0x66, 0x2e, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00};
173     static const uint8_t nop12[] = {
174         0x66, 0x66, 0x66, 0x2e, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00};
175     static const uint8_t nop13[] = {
176         0x66, 0x66, 0x66, 0x66, 0x2e, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00};
177     static const uint8_t nop14[] = {
178         0x66, 0x66, 0x66, 0x66, 0x66, 0x2e, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00};
179     static const uint8_t nop15[] = {
180         0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x2e, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00};
181 
182     static const uint8_t* nops[kNumNops] = {nop1,
183                                             nop2,
184                                             nop3,
185                                             nop4,
186                                             nop5,
187                                             nop6,
188                                             nop7,
189                                             nop8,
190                                             nop9,
191                                             nop10,
192                                             nop11,
193                                             nop12,
194                                             nop13,
195                                             nop14,
196                                             nop15};
197     // Common case.
198     if (bytes == 1) {
199       Emit8(nop1[0]);
200       return;
201     }
202 
203     while (bytes > 0) {
204       uint32_t len = bytes;
205       if (len > kNumNops) {
206         len = kNumNops;
207       }
208       EmitSequence(nops[len - 1], len);
209       bytes -= len;
210     }
211   }
212 
213 // Instructions.
214 #include "berberis/assembler/gen_assembler_common_x86-inl.h"  // NOLINT generated file
215 
216   // Flow control.
Jmp(int32_t offset)217   void Jmp(int32_t offset) {
218     CHECK_GE(offset, INT32_MIN + 2);
219     int32_t short_offset = offset - 2;
220     if (IsInRange<int8_t>(short_offset)) {
221       Emit8(0xeb);
222       Emit8(static_cast<int8_t>(short_offset));
223     } else {
224       CHECK_GE(offset, INT32_MIN + 5);
225       Emit8(0xe9);
226       Emit32(offset - 5);
227     }
228   }
229 
Call(int32_t offset)230   void Call(int32_t offset) {
231     CHECK_GE(offset, INT32_MIN + 5);
232     Emit8(0xe8);
233     Emit32(offset - 5);
234   }
235 
Jcc(Condition cc,int32_t offset)236   void Jcc(Condition cc, int32_t offset) {
237     if (cc == Condition::kAlways) {
238       Jmp(offset);
239       return;
240     }
241     if (cc == Condition::kNever) {
242       return;
243     }
244     CHECK_EQ(0, static_cast<uint8_t>(cc) & 0xf0);
245     CHECK_GE(offset, INT32_MIN + 2);
246     int32_t short_offset = offset - 2;
247     if (IsInRange<int8_t>(short_offset)) {
248       Emit8(0x70 | static_cast<uint8_t>(cc));
249       Emit8(static_cast<int8_t>(short_offset));
250     } else {
251       CHECK_GE(offset, INT32_MIN + 6);
252       Emit8(0x0f);
253       Emit8(0x80 | static_cast<uint8_t>(cc));
254       Emit32(offset - 6);
255     }
256   }
257 
258  protected:
259   // Helper types to distinguish argument types.
260   struct Register8Bit {
Register8BitRegister8Bit261     explicit constexpr Register8Bit(Register reg) : num(reg.num) {}
262     uint8_t num;
263   };
264 
265   struct Register32Bit {
Register32BitRegister32Bit266     explicit constexpr Register32Bit(Register reg) : num(reg.num) {}
Register32BitRegister32Bit267     explicit constexpr Register32Bit(XMMRegister reg) : num(reg.num) {}
268     uint8_t num;
269   };
270 
271   // 16-bit and 128-bit vector registers follow the same rules as 32-bit registers.
272   using Register16Bit = Register32Bit;
273   using VectorRegister128Bit = Register32Bit;
274   // Certain instructions (Enter/Leave, Jcc/Jmp/Loop, Call/Ret, Push/Pop) always operate
275   // on registers of default size (32-bit in 32-bit mode, 64-bit in 64-bit mode (see
276   // "Instructions Not Requiring REX Prefix in 64-Bit Mode" table in 24594 AMD Manual)
277   // Map these to Register32Bit, too, since they don't need REX.W even in 64-bit mode.
278   //
279   // x87 instructions fall into that category, too, since they were not expanded in x86-64 mode.
280   using RegisterDefaultBit = Register32Bit;
281 
282   struct Memory32Bit {
Memory32BitMemory32Bit283     explicit Memory32Bit(const Operand& op) : operand(op) {}
284     Operand operand;
285   };
286 
287   // 8-bit, 16-bit, 128-bit memory behave the same as 32-bit memory.
288   // Only 64-bit memory is different.
289   using Memory8Bit = Memory32Bit;
290   using Memory16Bit = Memory32Bit;
291   // X87 instructions always use the same encoding - even for 64-bit or 28-bytes
292   // memory operands (like in fldenv/fnstenv)
293   using MemoryX87 = Memory32Bit;
294   using MemoryX8716Bit = Memory32Bit;
295   using MemoryX8732Bit = Memory32Bit;
296   using MemoryX8764Bit = Memory32Bit;
297   using MemoryX8780Bit = Memory32Bit;
298   // Most vector instructions don't need to use REX.W to access 64-bit or 128-bit memory.
299   using VectorMemory32Bit = Memory32Bit;
300   using VectorMemory64Bit = Memory32Bit;
301   using VectorMemory128Bit = Memory32Bit;
302 
303   // Labels types for memory quantities.  Note that names are similar to the ones before because
304   // they are autogenerated.  E.g. VectorLabel32Bit should be read as “VECTOR's operation LABEL
305   // for 32-BIT quantity in memory”.
306   struct Label32Bit {
Label32BitLabel32Bit307     explicit Label32Bit(const struct LabelOperand& l) : label(l.label) {}
308     const Label& label;
309   };
310 
311   // 8-bit, 16-bit, 128-bit memory behave the same as 32-bit memory.
312   // Only 64-bit memory is different.
313   using Label8Bit = Label32Bit;
314   using Label16Bit = Label32Bit;
315   // X87 instructions always use the same encoding - even for 64-bit or 28-bytes
316   // memory operands (like in fldenv/fnstenv)
317   using LabelX87 = Label32Bit;
318   using LabelX8716Bit = Label32Bit;
319   using LabelX8732Bit = Label32Bit;
320   using LabelX8764Bit = Label32Bit;
321   using LabelX8780Bit = Label32Bit;
322   // Most vector instructions don't need to use REX.W to access 64-bit or 128-bit memory.
323   using VectorLabel32Bit = Label32Bit;
324   using VectorLabel64Bit = Label32Bit;
325   using VectorLabel128Bit = Label32Bit;
326 
IsLegacyPrefix(int code)327   static constexpr bool IsLegacyPrefix(int code) {
328     // Legacy prefixes used as opcode extensions in SSE.
329     // Lock is used by cmpxchg.
330     return (code == 0x66) || (code == 0xf2) || (code == 0xf3) || (code == 0xf0);
331   }
332 
333   // Delegate check to Assembler::template IsRegister.
334   template <typename ArgumentType>
335   struct IsCondition {
336     static constexpr bool value = std::is_same_v<ArgumentType, Condition>;
337   };
338 
339   template <typename ArgumentType>
340   struct IsRegister {
341     static constexpr bool value = Assembler::template IsRegister<ArgumentType>::value ||
342                                   std::is_same_v<ArgumentType, X87Register>;
343   };
344 
345   template <typename ArgumentType>
346   struct IsMemoryOperand {
347     static constexpr bool value = Assembler::template IsMemoryOperand<ArgumentType>::value;
348   };
349 
350   template <typename ArgumentType>
351   struct IsLabelOperand {
352     static constexpr bool value = Assembler::template IsLabelOperand<ArgumentType>::value;
353   };
354 
355   template <typename ArgumentType>
356   struct IsImmediate {
357     static constexpr bool value =
358         std::is_integral_v<ArgumentType> &&
359         ((sizeof(ArgumentType) == sizeof(int8_t)) || (sizeof(ArgumentType) == sizeof(int16_t)) ||
360          (sizeof(ArgumentType) == sizeof(int32_t)) || (sizeof(ArgumentType) == sizeof(int64_t)));
361   };
362 
363   // Count number of arguments selected by Predicate.
364   template <template <typename> typename Predicate, typename... ArgumentTypes>
365   static constexpr std::size_t kCountArguments = ((Predicate<ArgumentTypes>::value ? 1 : 0) + ... +
366                                                   0);
367 
368   // Extract arguments selected by Predicate.
369   //
370   // Note: This interface begs for the trick used in EmitFunctionTypeHelper in make_intrinsics.cc
371   // in conjunction with structured bindings.
372   //
373   // Unfortunately returning std::tuple slows down AssemblerTest by about 30% when libc++ and clang
374   // are used together (no slowdown on GCC, no slowdown on clang+libstdc++).
375   //
376   // TODO(http://b/140721204): refactor when it would be safe to return std::tuple from function.
377   //
378   template <std::size_t index,
379             template <typename>
380             typename Predicate,
381             typename ArgumentType,
382             typename... ArgumentTypes>
ArgumentByType(ArgumentType argument,ArgumentTypes...arguments)383   static constexpr auto ArgumentByType(ArgumentType argument, ArgumentTypes... arguments) {
384     if constexpr (Predicate<std::decay_t<ArgumentType>>::value) {
385       if constexpr (index == 0) {
386         return argument;
387       } else {
388         return ArgumentByType<index - 1, Predicate>(arguments...);
389       }
390     } else {
391       return ArgumentByType<index, Predicate>(arguments...);
392     }
393   }
394 
395   // Emit immediates - they always come at the end and don't affect anything except rip-addressig.
EmitImmediates()396   static constexpr void EmitImmediates() {}
397 
398   template <typename FirstArgumentType, typename... ArgumentTypes>
EmitImmediates(FirstArgumentType first_argument,ArgumentTypes...other_arguments)399   void EmitImmediates(FirstArgumentType first_argument, ArgumentTypes... other_arguments) {
400     if constexpr (std::is_integral_v<FirstArgumentType> &&
401                   sizeof(FirstArgumentType) == sizeof(int8_t)) {
402       Emit8(first_argument);
403     } else if constexpr (std::is_integral_v<FirstArgumentType> &&
404                          sizeof(FirstArgumentType) == sizeof(int16_t)) {
405       Emit16(first_argument);
406     } else if constexpr (std::is_integral_v<FirstArgumentType> &&
407                          sizeof(FirstArgumentType) == sizeof(int32_t)) {
408       Emit32(first_argument);
409     } else if constexpr (std::is_integral_v<FirstArgumentType> &&
410                          sizeof(FirstArgumentType) == sizeof(int64_t)) {
411       Emit64(first_argument);
412     }
413     EmitImmediates(other_arguments...);
414   }
415 
416   template <typename ArgumentType>
ImmediateSize()417   static constexpr size_t ImmediateSize() {
418     if constexpr (std::is_integral_v<ArgumentType> && sizeof(ArgumentType) == sizeof(int8_t)) {
419       return 1;
420     } else if constexpr (std::is_integral_v<ArgumentType> &&
421                          sizeof(ArgumentType) == sizeof(int16_t)) {
422       return 2;
423     } else if constexpr (std::is_integral_v<ArgumentType> &&
424                          sizeof(ArgumentType) == sizeof(int32_t)) {
425       return 4;
426     } else if constexpr (std::is_integral_v<ArgumentType> &&
427                          sizeof(ArgumentType) == sizeof(int64_t)) {
428       return 8;
429     } else {
430       static_assert(!std::is_integral_v<ArgumentType>);
431       return 0;
432     }
433   }
434 
435   template <typename... ArgumentTypes>
ImmediatesSize()436   static constexpr size_t ImmediatesSize() {
437     return (ImmediateSize<ArgumentTypes>() + ... + 0);
438   }
439 
440   // Struct type to pass information about opcodes.
441   template <uint8_t... kOpcodes>
442   struct Opcodes {};
443 
444   template <uint8_t... kOpcodes>
OpcodesCount(Opcodes<kOpcodes...>)445   static constexpr size_t OpcodesCount(Opcodes<kOpcodes...>) {
446     return sizeof...(kOpcodes);
447   }
448 
449   template <uint8_t kOpcode, uint8_t... kOpcodes>
FirstOpcode(Opcodes<kOpcode,kOpcodes...>)450   static constexpr uint8_t FirstOpcode(Opcodes<kOpcode, kOpcodes...>) {
451     return kOpcode;
452   }
453 
454   template <uint8_t kOpcode, uint8_t... kOpcodes>
SkipFirstOpcodeFromType(Opcodes<kOpcode,kOpcodes...>)455   static constexpr auto SkipFirstOpcodeFromType(Opcodes<kOpcode, kOpcodes...>) {
456     return Opcodes<kOpcodes...>{};
457   }
458 
459   template <uint8_t kOpcode, uint8_t... kOpcodes>
EmitLegacyPrefixes(Opcodes<kOpcode,kOpcodes...> opcodes)460   auto EmitLegacyPrefixes(Opcodes<kOpcode, kOpcodes...> opcodes) {
461     if constexpr (IsLegacyPrefix(kOpcode)) {
462       Emit8(kOpcode);
463       return EmitLegacyPrefixes(Opcodes<kOpcodes...>{});
464     } else {
465       return opcodes;
466     }
467   }
468 
469   // Note: We may need separate x87 EmitInstruction if we would want to support
470   // full set of x86 instructions.
471   //
472   // That's because 8087 was completely separate piece of silicone which was only
473   // partially driven by 8086:
474   //     https://en.wikipedia.org/wiki/Intel_8087
475   //
476   // In particular it had the following properties:
477   //   1. It had its own separate subset of opcodes - because it did its own decoding.
478   //   2. It had separate set of registers and could *only* access these.
479   //   2a. The 8086, in turn, *couldn't* access these registers at all.
480   //   3. To access memory it was designed to take address from address bus.
481   //
482   // This means that:
483   //   1. x87 instructions are easily recognizable - all instructions with opcodes 0xd8
484   //      to 0xdf are x87 instructions, all instructions with other opcodes are not.
485   //   2. We could be sure that x87 registers would only be used with x87 instructions
486   //      and other types of registers wouldn't be used with these.
487   //   3. We still would use normal registers for memory access, but REX.W bit wouldn't
488   //      be used for 64-bit quantities, whether they are floating point numbers or integers.
489   //
490   // Right now we only use EmitInstruction to emit x87 instructions which are using memory
491   // operands - and it works well enough for that because of #3.
492 
493   // If you want to understand how this function works (and how helper function like Vex and
494   // Rex work), you need good understanding of AMD/Intel Instruction format.
495   //
496   // Intel manual includes the most precise explanation, but it's VERY hard to read.
497   //
498   // AMD manual is much easier to read, but it doesn't include description of EVEX
499   // instructions and is less precise. Diagram on page 2 of Volume 3 is especially helpful:
500   //   https://www.amd.com/system/files/TechDocs/24594.pdf#page=42
501   //
502   // And the most concise (albeit unofficial) in on osdev Wiki:
503   //   https://wiki.osdev.org/X86-64_Instruction_Encoding
504 
505   // Note: if you change this function (or any of the helper functions) then remove --fast
506   // option from ExhaustiveAssemblerTest to run full blackbox comparison to clang.
507 
508   template <typename InstructionOpcodes, typename... ArgumentsTypes>
EmitInstruction(ArgumentsTypes...arguments)509   void EmitInstruction(ArgumentsTypes... arguments) {
510     auto opcodes_no_prefixes = EmitLegacyPrefixes(InstructionOpcodes{});
511     // We don't yet support any XOP-encoded instructions, but they are 100% identical to vex ones,
512     // except they are using 0x8F prefix, not 0xC4 prefix.
513     constexpr auto vex_xop = [&](auto opcodes) {
514       if constexpr (OpcodesCount(opcodes) < 3) {
515         return false;
516       // Note that JSON files use AMD approach: bytes are specified as in AMD manual (only we are
517       // replacing ¬R/¬X/¬B and vvvv bits with zeros).
518       //
519       // In particular it means that vex-encoded instructions should be specified with 0xC4 even if
520       // they are always emitted with 0xC4-to-0xC5 folding.
521       } else if constexpr (FirstOpcode(opcodes) == 0xC4 || FirstOpcode(opcodes) == 0x8F) {
522         return true;
523       }
524       return false;
525     }(opcodes_no_prefixes);
526     constexpr auto conditions_count = kCountArguments<IsCondition, ArgumentsTypes...>;
527     constexpr auto operands_count = kCountArguments<IsMemoryOperand, ArgumentsTypes...>;
528     constexpr auto labels_count = kCountArguments<IsLabelOperand, ArgumentsTypes...>;
529     constexpr auto registers_count = kCountArguments<IsRegister, ArgumentsTypes...>;
530     // We need to know if Reg field (in ModRM byte) is an opcode extension or if opcode extension
531     // goes into the immediate field.
532     constexpr auto reg_is_opcode_extension =
533         (registers_count + operands_count > 0) &&
534         (registers_count + operands_count + labels_count <
535          2 + vex_xop * (OpcodesCount(opcodes_no_prefixes) - 4));
536     static_assert((registers_count + operands_count + labels_count + conditions_count +
537                    kCountArguments<IsImmediate, ArgumentsTypes...>) == sizeof...(ArgumentsTypes),
538                   "Only registers (with specified size), Operands (with specified size), "
539                   "Conditions, and Immediates are supported.");
540     static_assert(operands_count <= 1, "Only one operand is allowed in instruction.");
541     static_assert(labels_count <= 1, "Only one label is allowed in instruction.");
542     // 0x0f is an opcode extension, if it's not there then we only have one byte opcode.
543     auto opcodes_no_prefixes_no_opcode_extension = [&](auto opcodes) {
544       if constexpr (vex_xop) {
545         static_assert(conditions_count == 0,
546                       "No conditionals are supported in vex/xop instructions.");
547         static_assert((registers_count + operands_count + labels_count) <= 4,
548                       "Up to four-arguments in vex/xop instructions are supported.");
549         constexpr auto vex_xop_byte1 = FirstOpcode(opcodes);
550         constexpr auto vex_xop_byte2 = FirstOpcode(SkipFirstOpcodeFromType(opcodes));
551         constexpr auto vex_xop_byte3 =
552             FirstOpcode(SkipFirstOpcodeFromType(SkipFirstOpcodeFromType(opcodes)));
553         static_cast<Assembler*>(this)
554             ->template EmitVex<vex_xop_byte1,
555                                vex_xop_byte2,
556                                vex_xop_byte3,
557                                reg_is_opcode_extension>(arguments...);
558         return SkipFirstOpcodeFromType(SkipFirstOpcodeFromType(SkipFirstOpcodeFromType(opcodes)));
559       } else {
560         static_assert(conditions_count <= 1, "Only one condition is allowed in instruction.");
561         static_assert((registers_count + operands_count + labels_count) <= 2,
562                       "Only two-arguments legacy instructions are supported.");
563         static_cast<Assembler*>(this)->EmitRex(arguments...);
564         if constexpr (FirstOpcode(opcodes) == 0x0F) {
565           Emit8(0x0F);
566           auto opcodes_no_prefixes_no_opcode_0x0F_extension = SkipFirstOpcodeFromType(opcodes);
567           if constexpr (FirstOpcode(opcodes_no_prefixes_no_opcode_0x0F_extension) == 0x38) {
568             Emit8(0x38);
569             return SkipFirstOpcodeFromType(opcodes_no_prefixes_no_opcode_0x0F_extension);
570           } else if constexpr (FirstOpcode(opcodes_no_prefixes_no_opcode_0x0F_extension) == 0x3A) {
571             Emit8(0x3A);
572             return SkipFirstOpcodeFromType(opcodes_no_prefixes_no_opcode_0x0F_extension);
573           } else {
574             return opcodes_no_prefixes_no_opcode_0x0F_extension;
575           }
576         } else {
577           return opcodes;
578         }
579       }
580     }(opcodes_no_prefixes);
581     // These are older 8086 instructions which encode register number in the opcode itself.
582     if constexpr (registers_count == 1 && operands_count == 0 && labels_count == 0 &&
583                   OpcodesCount(opcodes_no_prefixes_no_opcode_extension) == 1) {
584       static_cast<Assembler*>(this)->EmitRegisterInOpcode(
585           FirstOpcode(opcodes_no_prefixes_no_opcode_extension),
586           ArgumentByType<0, IsRegister>(arguments...));
587       EmitImmediates(arguments...);
588     } else {
589       // Emit "main" single-byte opcode.
590       if constexpr (conditions_count == 1) {
591         auto condition_code = static_cast<uint8_t>(ArgumentByType<0, IsCondition>(arguments...));
592         CHECK_EQ(0, condition_code & 0xF0);
593         Emit8(FirstOpcode(opcodes_no_prefixes_no_opcode_extension) | condition_code);
594       } else {
595         Emit8(FirstOpcode(opcodes_no_prefixes_no_opcode_extension));
596       }
597       auto extra_opcodes = SkipFirstOpcodeFromType(opcodes_no_prefixes_no_opcode_extension);
598       if constexpr (reg_is_opcode_extension) {
599         if constexpr (operands_count == 1) {
600           static_cast<Assembler*>(this)->EmitOperandOp(
601               static_cast<int>(FirstOpcode(extra_opcodes)),
602               ArgumentByType<0, IsMemoryOperand>(arguments...).operand);
603         } else if constexpr (labels_count == 1) {
604           static_cast<Assembler*>(this)->template EmitRipOp<ImmediatesSize<ArgumentsTypes...>()>(
605               static_cast<int>(FirstOpcode(extra_opcodes)),
606               ArgumentByType<0, IsLabelOperand>(arguments...).label);
607         } else {
608           static_cast<Assembler*>(this)->EmitModRM(this->FirstOpcode(extra_opcodes),
609                                                    ArgumentByType<0, IsRegister>(arguments...));
610         }
611       } else if constexpr (registers_count > 0) {
612         if constexpr (operands_count == 1) {
613           static_cast<Assembler*>(this)->EmitOperandOp(
614               ArgumentByType<0, IsRegister>(arguments...),
615               ArgumentByType<0, IsMemoryOperand>(arguments...).operand);
616         } else if constexpr (labels_count == 1) {
617           static_cast<Assembler*>(this)->template EmitRipOp<ImmediatesSize<ArgumentsTypes...>()>(
618               ArgumentByType<0, IsRegister>(arguments...),
619               ArgumentByType<0, IsLabelOperand>(arguments...).label);
620         } else {
621           static_cast<Assembler*>(this)->EmitModRM(ArgumentByType<0, IsRegister>(arguments...),
622                                                    ArgumentByType<1, IsRegister>(arguments...));
623         }
624       }
625       // If reg is an opcode extension then we already used that element.
626       if constexpr (reg_is_opcode_extension) {
627         static_assert(OpcodesCount(extra_opcodes) == 1);
628       } else if constexpr (OpcodesCount(extra_opcodes) > 0) {
629         // Final opcode byte(s) - they are in the place where immediate is expected.
630         // Cmpsps/Cmppd and 3DNow! instructions are using it.
631         static_assert(OpcodesCount(extra_opcodes) == 1);
632         Emit8(FirstOpcode(extra_opcodes));
633       }
634       if constexpr (registers_count + operands_count + labels_count == 4) {
635         if constexpr (kCountArguments<IsImmediate, ArgumentsTypes...> == 1) {
636           Emit8((ArgumentByType<registers_count - 1, IsRegister>(arguments...).num << 4) |
637                 ArgumentByType<0, IsImmediate>(arguments...));
638         } else {
639           static_assert(kCountArguments<IsImmediate, ArgumentsTypes...> == 0);
640           Emit8(ArgumentByType<registers_count - 1, IsRegister>(arguments...).num << 4);
641         }
642       } else {
643         EmitImmediates(arguments...);
644       }
645     }
646   }
647 
648   void ResolveJumps();
649 
650  private:
651   DISALLOW_IMPLICIT_CONSTRUCTORS(AssemblerX86);
652 };
653 
654 // Return the reverse condition.
655 template <typename Condition>
ToReverseCond(Condition cond)656 inline constexpr Condition ToReverseCond(Condition cond) {
657   CHECK(cond != Condition::kInvalidCondition);
658   // Condition has a nice property that given a condition, you can get
659   // its reverse condition by flipping the least significant bit.
660   return Condition(static_cast<int>(cond) ^ 1);
661 }
662 
663 template <typename Condition>
GetCondName(Condition cond)664 inline constexpr const char* GetCondName(Condition cond) {
665   switch (cond) {
666     case Condition::kOverflow:
667       return "O";
668     case Condition::kNoOverflow:
669       return "NO";
670     case Condition::kBelow:
671       return "B";
672     case Condition::kAboveEqual:
673       return "AE";
674     case Condition::kEqual:
675       return "Z";
676     case Condition::kNotEqual:
677       return "NZ";
678     case Condition::kBelowEqual:
679       return "BE";
680     case Condition::kAbove:
681       return "A";
682     case Condition::kNegative:
683       return "N";
684     case Condition::kPositive:
685       return "PL";
686     case Condition::kParityEven:
687       return "PE";
688     case Condition::kParityOdd:
689       return "PO";
690     case Condition::kLess:
691       return "LS";
692     case Condition::kGreaterEqual:
693       return "GE";
694     case Condition::kLessEqual:
695       return "LE";
696     case Condition::kGreater:
697       return "GT";
698     default:
699       return "??";
700   }
701 }
702 
703 template <typename Assembler>
Pmov(XMMRegister dest,XMMRegister src)704 inline void AssemblerX86<Assembler>::Pmov(XMMRegister dest, XMMRegister src) {
705   // SSE does not have operations for register-to-register integer move and
706   // Intel explicitly recommends to use pshufd instead on Pentium4:
707   //   See https://software.intel.com/en-us/articles/
708   //               fast-simd-integer-move-for-the-intel-pentiumr-4-processor
709   // These recommendations are CPU-dependent, though, thus we will need to
710   // investigate this question further before we could decide when to use
711   // movaps (or movapd) and when to use pshufd.
712   //
713   // TODO(khim): investigate performance problems related to integer MOVs
714   Movaps(dest, src);
715 }
716 
717 template <typename Assembler>
Call(const Label & label)718 inline void AssemblerX86<Assembler>::Call(const Label& label) {
719   if (label.IsBound()) {
720     int32_t offset = label.position() - pc();
721     Call(offset);
722   } else {
723     Emit8(0xe8);
724     Emit32(0xfffffffc);
725     jumps_.push_back(Jump{&label, pc() - 4, false});
726   }
727 }
728 
729 template <typename Assembler>
Jcc(Condition cc,const Label & label)730 inline void AssemblerX86<Assembler>::Jcc(Condition cc, const Label& label) {
731   if (cc == Condition::kAlways) {
732     Jmp(label);
733     return;
734   } else if (cc == Condition::kNever) {
735     return;
736   }
737   CHECK_EQ(0, static_cast<uint8_t>(cc) & 0xF0);
738   // TODO(eaeltsin): may be remove IsBound case?
739   // Then jcc by label will be of fixed size (5 bytes)
740   if (label.IsBound()) {
741     int32_t offset = label.position() - pc();
742     Jcc(cc, offset);
743   } else {
744     Emit16(0x800f | (static_cast<uint8_t>(cc) << 8));
745     Emit32(0xfffffffc);
746     jumps_.push_back(Jump{&label, pc() - 4, false});
747   }
748 }
749 
750 template <typename Assembler>
Jmp(const Label & label)751 inline void AssemblerX86<Assembler>::Jmp(const Label& label) {
752   // TODO(eaeltsin): may be remove IsBound case?
753   // Then jmp by label will be of fixed size (5 bytes)
754   if (label.IsBound()) {
755     int32_t offset = label.position() - pc();
756     Jmp(offset);
757   } else {
758     Emit8(0xe9);
759     Emit32(0xfffffffc);
760     jumps_.push_back(Jump{&label, pc() - 4, false});
761   }
762 }
763 
764 template <typename Assembler>
ResolveJumps()765 inline void AssemblerX86<Assembler>::ResolveJumps() {
766   for (const auto& jump : jumps_) {
767     const Label* label = jump.label;
768     uint32_t pc = jump.pc;
769     CHECK(label->IsBound());
770     if (jump.is_recovery) {
771       // Add pc -> label correspondence to recovery map.
772       AddRelocation(0, RelocationType::RelocRecoveryPoint, pc, label->position());
773     } else {
774       int32_t offset = label->position() - pc;
775       *AddrAs<int32_t>(pc) += offset;
776     }
777   }
778 }
779 
780 // Code size optimized instructions: they have different variants depending on registers used.
781 
782 template <typename Assembler>
Xchgl(Register dest,Register src)783 inline void AssemblerX86<Assembler>::Xchgl(Register dest, Register src) {
784   if (Assembler::IsAccumulator(src) || Assembler::IsAccumulator(dest)) {
785     Register other = Assembler::IsAccumulator(src) ? dest : src;
786     EmitInstruction<Opcodes<0x90>>(Register32Bit(other));
787   } else {
788     // Clang 8 (after r330298) puts dest before src.  We are comparing output
789     // to clang in exhaustive test thus we want to match clang behavior exactly.
790     EmitInstruction<Opcodes<0x87>>(Register32Bit(dest), Register32Bit(src));
791   }
792 }
793 
794 }  // namespace berberis
795 
796 #endif  // BERBERIS_ASSEMBLER_COMMON_X86_H_
797