1 /*
2  * Copyright (C) 2018 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef BERBERIS_INTRINSICS_COMMON_TO_X86_TEXT_ASSEMBLER_COMMON_H_
18 #define BERBERIS_INTRINSICS_COMMON_TO_X86_TEXT_ASSEMBLER_COMMON_H_
19 
20 #include <array>
21 #include <cstdint>
22 #include <cstdio>
23 #include <deque>
24 #include <string>
25 
26 #include "berberis/base/checks.h"
27 #include "berberis/base/config.h"
28 #include "berberis/base/macros.h"  // DISALLOW_IMPLICIT_CONSTRUCTORS
29 
30 namespace berberis {
31 
32 namespace constants_pool {
33 
34 int32_t GetOffset(int32_t address);
35 
36 }
37 
38 template <typename Assembler>
39 class TextAssemblerX86 {
40  public:
41   // Condition class - 16 x86 conditions.
42   enum class Condition {
43     kOverflow = 0,
44     kNoOverflow = 1,
45     kBelow = 2,
46     kAboveEqual = 3,
47     kEqual = 4,
48     kNotEqual = 5,
49     kBelowEqual = 6,
50     kAbove = 7,
51     kNegative = 8,
52     kPositive = 9,
53     kParityEven = 10,
54     kParityOdd = 11,
55     kLess = 12,
56     kGreaterEqual = 13,
57     kLessEqual = 14,
58     kGreater = 15,
59 
60     // aka...
61     kCarry = kBelow,
62     kNotCarry = kAboveEqual,
63     kZero = kEqual,
64     kNotZero = kNotEqual,
65     kSign = kNegative,
66     kNotSign = kPositive
67   };
68 
69   enum ScaleFactor {
70     kTimesOne = 0,
71     kTimesTwo = 1,
72     kTimesFour = 2,
73     kTimesEight = 3,
74     // All our target systems use 32-bit pointers.
75     kTimesPointerSize = kTimesFour
76   };
77 
78   struct Label {
79     size_t id;
80     bool bound = false;
81 
82     template <typename MacroAssembler>
ToGasArgumentLabel83     friend std::string ToGasArgument(const Label& label, MacroAssembler*) {
84       return std::to_string(label.id) + (label.bound ? "b" : "f");
85     }
86   };
87 
88   struct Operand;
89 
90   class Register {
91    public:
Register()92     constexpr Register() : arg_no_(kNoRegister) {}
Register(int arg_no)93     constexpr Register(int arg_no) : arg_no_(arg_no) {}
arg_no()94     int arg_no() const {
95       CHECK_NE(arg_no_, kNoRegister);
96       return arg_no_;
97     }
98 
99     constexpr bool operator==(const Register& other) const { return arg_no() == other.arg_no(); }
100     constexpr bool operator!=(const Register& other) const { return arg_no() != other.arg_no(); }
101 
102     static constexpr int kNoRegister = -1;
103     static constexpr int kStackPointer = -2;
104     // Used in Operand to deal with references to scratch area.
105     static constexpr int kScratchPointer = -3;
106 
107    private:
108     friend struct Operand;
109 
110     // Register number created during creation of assembler call.
111     // See arg['arm_register'] in _gen_c_intrinsic_body in gen_intrinsics.py
112     //
113     // Default value (-1) means it's not assigned yet (thus couldn't be used).
114     int arg_no_;
115   };
116 
117   class X87Register {
118    public:
X87Register()119     constexpr X87Register() : arg_no_(kNoRegister) {}
X87Register(int arg_no)120     constexpr X87Register(int arg_no) : arg_no_(arg_no) {}
arg_no()121     int arg_no() const {
122       CHECK_NE(arg_no_, kNoRegister);
123       return arg_no_;
124     }
125 
126     constexpr bool operator==(const X87Register& other) const { return arg_no_ == other.arg_no_; }
127     constexpr bool operator!=(const X87Register& other) const { return arg_no_ != other.arg_no_; }
128 
129     template <typename MacroAssembler>
ToGasArgument(const X87Register & reg,MacroAssembler *)130     friend const std::string ToGasArgument(const X87Register& reg, MacroAssembler*) {
131       return '%' + std::to_string(reg.arg_no());
132     }
133 
134    private:
135     // Register number created during creation of assembler call.
136     // See arg['arm_register'] in _gen_c_intrinsic_body in gen_intrinsics.py
137     //
138     // Default value (-1) means it's not assigned yet (thus couldn't be used).
139     static constexpr int kNoRegister = -1;
140     int arg_no_;
141   };
142 
143   class XMMRegister {
144    public:
XMMRegister()145     constexpr XMMRegister() : arg_no_(kNoRegister) {}
XMMRegister(int arg_no)146     constexpr XMMRegister(int arg_no) : arg_no_(arg_no) {}
arg_no()147     int arg_no() const {
148       CHECK_NE(arg_no_, kNoRegister);
149       return arg_no_;
150     }
151 
152     constexpr bool operator==(const XMMRegister& other) const { return arg_no() == other.arg_no(); }
153     constexpr bool operator!=(const XMMRegister& other) const { return arg_no() != other.arg_no(); }
154 
155     template <typename MacroAssembler>
ToGasArgument(const XMMRegister & reg,MacroAssembler *)156     friend const std::string ToGasArgument(const XMMRegister& reg, MacroAssembler*) {
157       return '%' + std::to_string(reg.arg_no());
158     }
159 
160    private:
161     // Register number created during creation of assembler call.
162     // See arg['arm_register'] in _gen_c_intrinsic_body in gen_intrinsics.py
163     //
164     // Default value (-1) means it's not assigned yet (thus couldn't be used).
165     static constexpr int kNoRegister = -1;
166     int arg_no_;
167   };
168 
169   struct Operand {
170     Register base = Register{};
171     Register index = Register{};
172     ScaleFactor scale = kTimesOne;
173     int32_t disp = 0;
174 
175     template <typename MacroAssembler>
ToGasArgumentOperand176     friend const std::string ToGasArgument(const Operand& op, MacroAssembler* as) {
177       std::string result{};
178       if (op.base.arg_no_ == Register::kNoRegister and op.index.arg_no_ == Register::kNoRegister) {
179         as->need_gpr_macroassembler_constants_ = true;
180         result = std::to_string(constants_pool::GetOffset(op.disp)) + " + " +
181                  ToGasArgument(
182                      typename Assembler::RegisterDefaultBit(as->gpr_macroassembler_constants), as);
183       } else if (op.base.arg_no_ == Register::kScratchPointer) {
184         CHECK(op.index.arg_no_ == Register::kNoRegister);
185         // Only support two pointers to scratch area for now.
186         if (op.disp == 0) {
187           result = '%' + std::to_string(as->gpr_macroassembler_scratch.arg_no());
188         } else if (op.disp == config::kScratchAreaSlotSize) {
189           result = '%' + std::to_string(as->gpr_macroassembler_scratch2.arg_no());
190         } else {
191           FATAL("Only two scratch registers are supported for now");
192         }
193       } else {
194         if (op.base.arg_no_ != Register::kNoRegister) {
195           result = ToGasArgument(typename Assembler::RegisterDefaultBit(op.base), as);
196         }
197         if (op.index.arg_no_ != Register::kNoRegister) {
198           result += ',' + ToGasArgument(typename Assembler::RegisterDefaultBit(op.index), as) +
199                     ',' + std::to_string(1 << op.scale);
200         }
201         result = '(' + result + ')';
202         if (op.disp) {
203           result = std::to_string(op.disp) + result;
204         }
205       }
206       return result;
207     }
208   };
209 
TextAssemblerX86(int indent,FILE * out)210   TextAssemblerX86(int indent, FILE* out) : indent_(indent), out_(out) {}
211 
212   Register gpr_a{};
213   Register gpr_c{};
214   Register gpr_d{};
215   // Note: stack pointer is not reflected in list of arguments, intrinsics use
216   // it implicitly.
217   Register gpr_s{Register::kStackPointer};
218   // Used in Operand as pseudo-register to temporary operand.
219   Register gpr_scratch{Register::kScratchPointer};
220 
221   // In x86-64 case we could refer to kBerberisMacroAssemblerConstants via %rip.
222   // In x86-32 mode, on the other hand, we need complex dance to access it via GOT.
223   // Intrinsics which use these constants receive it via additional parameter - and
224   // we need to know if it's needed or not.
225   Register gpr_macroassembler_constants{};
need_gpr_macroassembler_constants()226   bool need_gpr_macroassembler_constants() const { return need_gpr_macroassembler_constants_; }
227 
228   Register gpr_macroassembler_scratch{};
need_gpr_macroassembler_scratch()229   bool need_gpr_macroassembler_scratch() const { return need_gpr_macroassembler_scratch_; }
230   Register gpr_macroassembler_scratch2{};
231 
232   bool need_avx = false;
233   bool need_bmi = false;
234   bool need_bmi2 = false;
235   bool need_fma = false;
236   bool need_fma4 = false;
237   bool need_lzcnt = false;
238   bool need_popcnt = false;
239   bool need_sse3 = false;
240   bool need_ssse3 = false;
241   bool need_sse4_1 = false;
242   bool need_sse4_2 = false;
243 
Bind(Label * label)244   void Bind(Label* label) {
245     CHECK_EQ(label->bound, false);
246     fprintf(out_, "%*s\"%zd:\\n\"\n", indent_ + 2, "", label->id);
247     label->bound = true;
248   }
249 
MakeLabel()250   Label* MakeLabel() {
251     labels_allocated_.push_back({labels_allocated_.size()});
252     return &labels_allocated_.back();
253   }
254 
255   template <typename... Args>
Byte(Args...args)256   void Byte(Args... args) {
257     static_assert((std::is_same_v<Args, uint8_t> && ...));
258     bool print_kwd = true;
259     fprintf(out_, "%*s\"", indent_ + 2, "");
260     (fprintf(out_, "%s%" PRIu8, print_kwd ? print_kwd = false, ".byte " : ", ", args), ...);
261     fprintf(out_, "\\n\"\n");
262   }
263 
264   template <typename... Args>
TwoByte(Args...args)265   void TwoByte(Args... args) {
266     static_assert((std::is_same_v<Args, uint16_t> && ...));
267     bool print_kwd = true;
268     fprintf(out_, "%*s\"", indent_ + 2, "");
269     (fprintf(out_, "%s%" PRIu16, print_kwd ? print_kwd = false, ".2byte " : ", ", args), ...);
270     fprintf(out_, "\\n\"\n");
271   }
272 
273   template <typename... Args>
FourByte(Args...args)274   void FourByte(Args... args) {
275     static_assert((std::is_same_v<Args, uint32_t> && ...));
276     bool print_kwd = true;
277     fprintf(out_, "%*s\"", indent_ + 2, "");
278     (fprintf(out_, "%s%" PRIu32, print_kwd ? print_kwd = false, ".4byte " : ", ", args), ...);
279     fprintf(out_, "\\n\"\n");
280   }
281 
282   template <typename... Args>
EigthByte(Args...args)283   void EigthByte(Args... args) {
284     static_assert((std::is_same_v<Args, uint64_t> && ...));
285     bool print_kwd = true;
286     fprintf(out_, "%*s\"", indent_ + 2, "");
287     (fprintf(out_, "%s%" PRIu64, print_kwd ? print_kwd = false, ".8byte " : ", ", args), ...);
288     fprintf(out_, "\\n\"\n");
289   }
290 
P2Align(uint32_t m)291   void P2Align(uint32_t m) {
292     fprintf(out_, "%*s\".p2align %u\\n\"\n", indent_ + 2, "", m);
293   }
294 
295 // Instructions.
296 #include "gen_text_assembler_common_x86-inl.h"  // NOLINT generated file
297 
298  protected:
299   bool need_gpr_macroassembler_constants_ = false;
300   bool need_gpr_macroassembler_scratch_ = false;
301 
302   template <const char* kSpPrefix, char kRegisterPrefix>
303   class RegisterTemplate {
304    public:
RegisterTemplate(Register reg)305     explicit RegisterTemplate(Register reg) : reg_(reg) {}
306 
307     template <typename MacroAssembler>
ToGasArgument(const RegisterTemplate & reg,MacroAssembler *)308     friend const std::string ToGasArgument(const RegisterTemplate& reg, MacroAssembler*) {
309       if (reg.reg_.arg_no() == Register::kStackPointer) {
310         return kSpPrefix;
311       } else {
312         if (kRegisterPrefix) {
313           return std::string({'%', kRegisterPrefix}) + std::to_string(reg.reg_.arg_no());
314         } else {
315           return '%' + std::to_string(reg.reg_.arg_no());
316         }
317       }
318     }
319 
320    private:
321     Register reg_;
322   };
323 
324   constexpr static char kSpl[] = "%%spl";
325   using Register8Bit = RegisterTemplate<kSpl, 'b'>;
326   constexpr static char kSp[] = "%%sp";
327   using Register16Bit = RegisterTemplate<kSp, 'w'>;
328   constexpr static char kEsp[] = "%%esp";
329   using Register32Bit = RegisterTemplate<kEsp, 'k'>;
330   constexpr static char kRsp[] = "%%rsp";
331   using Register64Bit = RegisterTemplate<kRsp, 'q'>;
332 
SetRequiredFeatureAVX()333   void SetRequiredFeatureAVX() {
334     need_avx = true;
335     SetRequiredFeatureSSE4_2();
336   }
337 
SetRequiredFeatureBMI()338   void SetRequiredFeatureBMI() {
339     need_bmi = true;
340   }
341 
SetRequiredFeatureBMI2()342   void SetRequiredFeatureBMI2() {
343     need_bmi2 = true;
344   }
345 
SetRequiredFeatureFMA()346   void SetRequiredFeatureFMA() {
347     need_fma = true;
348     SetRequiredFeatureAVX();
349   }
350 
SetRequiredFeatureFMA4()351   void SetRequiredFeatureFMA4() {
352     need_fma4 = true;
353     SetRequiredFeatureAVX();
354   }
355 
SetRequiredFeatureLZCNT()356   void SetRequiredFeatureLZCNT() {
357     need_lzcnt = true;
358   }
359 
SetRequiredFeaturePOPCNT()360   void SetRequiredFeaturePOPCNT() {
361     need_popcnt = true;
362   }
363 
SetRequiredFeatureSSE3()364   void SetRequiredFeatureSSE3() {
365     need_sse3 = true;
366     // Note: we assume that SSE2 is always available thus we don't have have_sse2 or have_sse1
367     // variables.
368   }
369 
SetRequiredFeatureSSSE3()370   void SetRequiredFeatureSSSE3() {
371     need_ssse3 = true;
372     SetRequiredFeatureSSE3();
373   }
374 
SetRequiredFeatureSSE4_1()375   void SetRequiredFeatureSSE4_1() {
376     need_sse4_1 = true;
377     SetRequiredFeatureSSSE3();
378   }
379 
SetRequiredFeatureSSE4_2()380   void SetRequiredFeatureSSE4_2() {
381     need_sse4_2 = true;
382     SetRequiredFeatureSSE4_1();
383   }
384 
385   template <typename... Args>
386   void Instruction(const char* name, Condition cond, const Args&... args);
387 
388   template <typename... Args>
389   void Instruction(const char* name, const Args&... args);
390 
EmitString()391   void EmitString() {}
392 
EmitString(const std::string & s)393   void EmitString(const std::string& s) { fprintf(out_, "%s", s.c_str()); }
394 
395   template <typename... Args>
EmitString(const std::string & s,const Args &...args)396   void EmitString(const std::string& s, const Args&... args) {
397     EmitString(args...);
398     fprintf(out_, ", %s", s.c_str());
399   }
400 
401  protected:
402   int indent_;
403   FILE* out_;
404 
405  private:
406   std::deque<Label> labels_allocated_;
407 
408   DISALLOW_IMPLICIT_CONSTRUCTORS(TextAssemblerX86);
409 };
410 
411 template <typename Arg, typename MacroAssembler>
ToGasArgument(const Arg & arg,MacroAssembler *)412 inline std::string ToGasArgument(const Arg& arg, MacroAssembler*) {
413   return "$" + std::to_string(arg);
414 }
415 
416 template <typename Assembler>
417 template <typename... Args>
Instruction(const char * name,Condition cond,const Args &...args)418 inline void TextAssemblerX86<Assembler>::Instruction(const char* name, Condition cond, const Args&... args) {
419   char name_with_condition[8] = {};
420   if (strcmp(name, "Cmovl") == 0 || strcmp(name, "Cmovq") == 0) {
421     strcpy(name_with_condition, "Cmov");
422   } else if (strcmp(name, "Jcc") == 0) {
423     strcpy(name_with_condition, "J");
424   } else {
425     CHECK(strcmp(name, "Setcc") == 0);
426     strcpy(name_with_condition, "Set");
427   }
428   switch (cond) {
429     case Condition::kOverflow:
430       strcat(name_with_condition, "o");
431       break;
432     case Condition::kNoOverflow:
433       strcat(name_with_condition, "no");
434       break;
435     case Condition::kBelow:
436       strcat(name_with_condition, "b");
437       break;
438     case Condition::kAboveEqual:
439       strcat(name_with_condition, "ae");
440       break;
441     case Condition::kEqual:
442       strcat(name_with_condition, "e");
443       break;
444     case Condition::kNotEqual:
445       strcat(name_with_condition, "ne");
446       break;
447     case Condition::kBelowEqual:
448       strcat(name_with_condition, "be");
449       break;
450     case Condition::kAbove:
451       strcat(name_with_condition, "a");
452       break;
453     case Condition::kNegative:
454       strcat(name_with_condition, "s");
455       break;
456     case Condition::kPositive:
457       strcat(name_with_condition, "ns");
458       break;
459     case Condition::kParityEven:
460       strcat(name_with_condition, "p");
461       break;
462     case Condition::kParityOdd:
463       strcat(name_with_condition, "np");
464       break;
465     case Condition::kLess:
466       strcat(name_with_condition, "l");
467       break;
468     case Condition::kGreaterEqual:
469       strcat(name_with_condition, "ge");
470       break;
471     case Condition::kLessEqual:
472       strcat(name_with_condition, "le");
473       break;
474     case Condition::kGreater:
475       strcat(name_with_condition, "g");
476       break;
477   }
478   Instruction(name_with_condition, args...);
479 }
480 
481 template <typename Assembler>
482 template <typename... Args>
Instruction(const char * name,const Args &...args)483 inline void TextAssemblerX86<Assembler>::Instruction(const char* name, const Args&... args) {
484   for (auto it : std::array<std::tuple<const char*, const char*>, 18>{
485            {// Note: SSE doesn't include simple register-to-register move instruction.
486             // You are supposed to use one of half-dozen variants depending on what you
487             // are doing.
488             //
489             // Pseudoinstructions with embedded "lock" prefix.
490             {"LockCmpXchg8b", "Lock; CmppXchg8b"},
491             {"LockCmpXchg16b", "Lock; CmppXchg16b"},
492             {"LockCmpXchgb", "Lock; CmppXchgb"},
493             {"LockCmpXchgl", "Lock; CmppXchgl"},
494             {"LockCmpXchgq", "Lock; CmppXchgq"},
495             {"LockCmpXchgw", "Lock; CmppXchgq"},
496             // Our assembler has Pmov instruction which is supposed to pick the best
497             // option - but currently we just map Pmov to Movaps.
498             {"Pmov", "Movaps"},
499             // These instructions use different names in our assembler than in GNU AS.
500             {"Movdq", "Movaps"},
501             {"Movsxbl", "Movsbl"},
502             {"Movsxbq", "Movsbq"},
503             {"Movsxwl", "Movswl"},
504             {"Movsxwq", "Movswq"},
505             {"Movsxlq", "Movslq"},
506             {"Movzxbl", "Movzbl"},
507             {"Movzxbq", "Movzbq"},
508             {"Movzxwl", "Movzwl"},
509             {"Movzxwq", "Movzwq"},
510             {"Movzxlq", "Movzlq"}}}) {
511     if (strcmp(name, std::get<0>(it)) == 0) {
512       name = std::get<1>(it);
513       break;
514     }
515   }
516 
517   int name_length = strlen(name);
518   auto cl_register = "";
519   if (name_length > 4 && strcmp(name + (name_length - 4), "ByCl") == 0) {
520     name_length -= 4;
521     cl_register = " %%cl,";
522   }
523 
524   fprintf(out_, "%*s\"%.*s%s ", indent_ + 2, "", name_length, name, cl_register);
525   EmitString(ToGasArgument(args, this)...);
526   fprintf(out_, "\\n\"\n");
527 }
528 
529 }  // namespace berberis
530 
531 #endif  // BERBERIS_INTRINSICS_COMMON_TO_X86_TEXT_ASSEMBLER_COMMON_H_
532