1 // Copyright 2015, ARM Limited
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are met:
6 //
7 //   * Redistributions of source code must retain the above copyright notice,
8 //     this list of conditions and the following disclaimer.
9 //   * Redistributions in binary form must reproduce the above copyright notice,
10 //     this list of conditions and the following disclaimer in the documentation
11 //     and/or other materials provided with the distribution.
12 //   * Neither the name of ARM Limited nor the names of its contributors may be
13 //     used to endorse or promote products derived from this software without
14 //     specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 
27 #ifndef VIXL_A64_ASSEMBLER_A64_H_
28 #define VIXL_A64_ASSEMBLER_A64_H_
29 
30 
31 #include "vixl/globals.h"
32 #include "vixl/invalset.h"
33 #include "vixl/utils.h"
34 #include "vixl/code-buffer.h"
35 #include "vixl/a64/instructions-a64.h"
36 
37 namespace vixl {
38 
39 typedef uint64_t RegList;
40 static const int kRegListSizeInBits = sizeof(RegList) * 8;
41 
42 
43 // Registers.
44 
45 // Some CPURegister methods can return Register or VRegister types, so we need
46 // to declare them in advance.
47 class Register;
48 class VRegister;
49 
50 class CPURegister {
51  public:
52   enum RegisterType {
53     // The kInvalid value is used to detect uninitialized static instances,
54     // which are always zero-initialized before any constructors are called.
55     kInvalid = 0,
56     kRegister,
57     kVRegister,
58     kFPRegister = kVRegister,
59     kNoRegister
60   };
61 
CPURegister()62   CPURegister() : code_(0), size_(0), type_(kNoRegister) {
63     VIXL_ASSERT(!IsValid());
64     VIXL_ASSERT(IsNone());
65   }
66 
CPURegister(unsigned code,unsigned size,RegisterType type)67   CPURegister(unsigned code, unsigned size, RegisterType type)
68       : code_(code), size_(size), type_(type) {
69     VIXL_ASSERT(IsValidOrNone());
70   }
71 
code()72   unsigned code() const {
73     VIXL_ASSERT(IsValid());
74     return code_;
75   }
76 
type()77   RegisterType type() const {
78     VIXL_ASSERT(IsValidOrNone());
79     return type_;
80   }
81 
Bit()82   RegList Bit() const {
83     VIXL_ASSERT(code_ < (sizeof(RegList) * 8));
84     return IsValid() ? (static_cast<RegList>(1) << code_) : 0;
85   }
86 
size()87   unsigned size() const {
88     VIXL_ASSERT(IsValid());
89     return size_;
90   }
91 
SizeInBytes()92   int SizeInBytes() const {
93     VIXL_ASSERT(IsValid());
94     VIXL_ASSERT(size() % 8 == 0);
95     return size_ / 8;
96   }
97 
SizeInBits()98   int SizeInBits() const {
99     VIXL_ASSERT(IsValid());
100     return size_;
101   }
102 
Is8Bits()103   bool Is8Bits() const {
104     VIXL_ASSERT(IsValid());
105     return size_ == 8;
106   }
107 
Is16Bits()108   bool Is16Bits() const {
109     VIXL_ASSERT(IsValid());
110     return size_ == 16;
111   }
112 
Is32Bits()113   bool Is32Bits() const {
114     VIXL_ASSERT(IsValid());
115     return size_ == 32;
116   }
117 
Is64Bits()118   bool Is64Bits() const {
119     VIXL_ASSERT(IsValid());
120     return size_ == 64;
121   }
122 
Is128Bits()123   bool Is128Bits() const {
124     VIXL_ASSERT(IsValid());
125     return size_ == 128;
126   }
127 
IsValid()128   bool IsValid() const {
129     if (IsValidRegister() || IsValidVRegister()) {
130       VIXL_ASSERT(!IsNone());
131       return true;
132     } else {
133       VIXL_ASSERT(IsNone());
134       return false;
135     }
136   }
137 
IsValidRegister()138   bool IsValidRegister() const {
139     return IsRegister() &&
140            ((size_ == kWRegSize) || (size_ == kXRegSize)) &&
141            ((code_ < kNumberOfRegisters) || (code_ == kSPRegInternalCode));
142   }
143 
IsValidVRegister()144   bool IsValidVRegister() const {
145     return IsVRegister() &&
146            ((size_ == kBRegSize) || (size_ == kHRegSize) ||
147             (size_ == kSRegSize) || (size_ == kDRegSize) ||
148             (size_ == kQRegSize)) &&
149            (code_ < kNumberOfVRegisters);
150   }
151 
IsValidFPRegister()152   bool IsValidFPRegister() const {
153     return IsFPRegister() && (code_ < kNumberOfVRegisters);
154   }
155 
IsNone()156   bool IsNone() const {
157     // kNoRegister types should always have size 0 and code 0.
158     VIXL_ASSERT((type_ != kNoRegister) || (code_ == 0));
159     VIXL_ASSERT((type_ != kNoRegister) || (size_ == 0));
160 
161     return type_ == kNoRegister;
162   }
163 
Aliases(const CPURegister & other)164   bool Aliases(const CPURegister& other) const {
165     VIXL_ASSERT(IsValidOrNone() && other.IsValidOrNone());
166     return (code_ == other.code_) && (type_ == other.type_);
167   }
168 
Is(const CPURegister & other)169   bool Is(const CPURegister& other) const {
170     VIXL_ASSERT(IsValidOrNone() && other.IsValidOrNone());
171     return Aliases(other) && (size_ == other.size_);
172   }
173 
IsZero()174   bool IsZero() const {
175     VIXL_ASSERT(IsValid());
176     return IsRegister() && (code_ == kZeroRegCode);
177   }
178 
IsSP()179   bool IsSP() const {
180     VIXL_ASSERT(IsValid());
181     return IsRegister() && (code_ == kSPRegInternalCode);
182   }
183 
IsRegister()184   bool IsRegister() const {
185     return type_ == kRegister;
186   }
187 
IsVRegister()188   bool IsVRegister() const {
189     return type_ == kVRegister;
190   }
191 
IsFPRegister()192   bool IsFPRegister() const {
193     return IsS() || IsD();
194   }
195 
IsW()196   bool IsW() const { return IsValidRegister() && Is32Bits(); }
IsX()197   bool IsX() const { return IsValidRegister() && Is64Bits(); }
198 
199   // These assertions ensure that the size and type of the register are as
200   // described. They do not consider the number of lanes that make up a vector.
201   // So, for example, Is8B() implies IsD(), and Is1D() implies IsD, but IsD()
202   // does not imply Is1D() or Is8B().
203   // Check the number of lanes, ie. the format of the vector, using methods such
204   // as Is8B(), Is1D(), etc. in the VRegister class.
IsV()205   bool IsV() const { return IsVRegister(); }
IsB()206   bool IsB() const { return IsV() && Is8Bits(); }
IsH()207   bool IsH() const { return IsV() && Is16Bits(); }
IsS()208   bool IsS() const { return IsV() && Is32Bits(); }
IsD()209   bool IsD() const { return IsV() && Is64Bits(); }
IsQ()210   bool IsQ() const { return IsV() && Is128Bits(); }
211 
212   const Register& W() const;
213   const Register& X() const;
214   const VRegister& V() const;
215   const VRegister& B() const;
216   const VRegister& H() const;
217   const VRegister& S() const;
218   const VRegister& D() const;
219   const VRegister& Q() const;
220 
IsSameSizeAndType(const CPURegister & other)221   bool IsSameSizeAndType(const CPURegister& other) const {
222     return (size_ == other.size_) && (type_ == other.type_);
223   }
224 
225  protected:
226   unsigned code_;
227   unsigned size_;
228   RegisterType type_;
229 
230  private:
IsValidOrNone()231   bool IsValidOrNone() const {
232     return IsValid() || IsNone();
233   }
234 };
235 
236 
237 class Register : public CPURegister {
238  public:
Register()239   Register() : CPURegister() {}
Register(const CPURegister & other)240   explicit Register(const CPURegister& other)
241       : CPURegister(other.code(), other.size(), other.type()) {
242     VIXL_ASSERT(IsValidRegister());
243   }
Register(unsigned code,unsigned size)244   Register(unsigned code, unsigned size)
245       : CPURegister(code, size, kRegister) {}
246 
IsValid()247   bool IsValid() const {
248     VIXL_ASSERT(IsRegister() || IsNone());
249     return IsValidRegister();
250   }
251 
252   static const Register& WRegFromCode(unsigned code);
253   static const Register& XRegFromCode(unsigned code);
254 
255  private:
256   static const Register wregisters[];
257   static const Register xregisters[];
258 };
259 
260 
261 class VRegister : public CPURegister {
262  public:
VRegister()263   VRegister() : CPURegister(), lanes_(1) {}
VRegister(const CPURegister & other)264   explicit VRegister(const CPURegister& other)
265       : CPURegister(other.code(), other.size(), other.type()), lanes_(1) {
266     VIXL_ASSERT(IsValidVRegister());
267     VIXL_ASSERT(IsPowerOf2(lanes_) && (lanes_ <= 16));
268   }
269   VRegister(unsigned code, unsigned size, unsigned lanes = 1)
CPURegister(code,size,kVRegister)270       : CPURegister(code, size, kVRegister), lanes_(lanes) {
271     VIXL_ASSERT(IsPowerOf2(lanes_) && (lanes_ <= 16));
272   }
VRegister(unsigned code,VectorFormat format)273   VRegister(unsigned code, VectorFormat format)
274       : CPURegister(code, RegisterSizeInBitsFromFormat(format), kVRegister),
275         lanes_(IsVectorFormat(format) ? LaneCountFromFormat(format) : 1) {
276     VIXL_ASSERT(IsPowerOf2(lanes_) && (lanes_ <= 16));
277   }
278 
IsValid()279   bool IsValid() const {
280     VIXL_ASSERT(IsVRegister() || IsNone());
281     return IsValidVRegister();
282   }
283 
284   static const VRegister& BRegFromCode(unsigned code);
285   static const VRegister& HRegFromCode(unsigned code);
286   static const VRegister& SRegFromCode(unsigned code);
287   static const VRegister& DRegFromCode(unsigned code);
288   static const VRegister& QRegFromCode(unsigned code);
289   static const VRegister& VRegFromCode(unsigned code);
290 
V8B()291   VRegister V8B() const { return VRegister(code_, kDRegSize, 8); }
V16B()292   VRegister V16B() const { return VRegister(code_, kQRegSize, 16); }
V4H()293   VRegister V4H() const { return VRegister(code_, kDRegSize, 4); }
V8H()294   VRegister V8H() const { return VRegister(code_, kQRegSize, 8); }
V2S()295   VRegister V2S() const { return VRegister(code_, kDRegSize, 2); }
V4S()296   VRegister V4S() const { return VRegister(code_, kQRegSize, 4); }
V2D()297   VRegister V2D() const { return VRegister(code_, kQRegSize, 2); }
V1D()298   VRegister V1D() const { return VRegister(code_, kDRegSize, 1); }
299 
Is8B()300   bool Is8B() const { return (Is64Bits() && (lanes_ == 8)); }
Is16B()301   bool Is16B() const { return (Is128Bits() && (lanes_ == 16)); }
Is4H()302   bool Is4H() const { return (Is64Bits() && (lanes_ == 4)); }
Is8H()303   bool Is8H() const { return (Is128Bits() && (lanes_ == 8)); }
Is2S()304   bool Is2S() const { return (Is64Bits() && (lanes_ == 2)); }
Is4S()305   bool Is4S() const { return (Is128Bits() && (lanes_ == 4)); }
Is1D()306   bool Is1D() const { return (Is64Bits() && (lanes_ == 1)); }
Is2D()307   bool Is2D() const { return (Is128Bits() && (lanes_ == 2)); }
308 
309   // For consistency, we assert the number of lanes of these scalar registers,
310   // even though there are no vectors of equivalent total size with which they
311   // could alias.
Is1B()312   bool Is1B() const {
313     VIXL_ASSERT(!(Is8Bits() && IsVector()));
314     return Is8Bits();
315   }
Is1H()316   bool Is1H() const {
317     VIXL_ASSERT(!(Is16Bits() && IsVector()));
318     return Is16Bits();
319   }
Is1S()320   bool Is1S() const {
321     VIXL_ASSERT(!(Is32Bits() && IsVector()));
322     return Is32Bits();
323   }
324 
IsLaneSizeB()325   bool IsLaneSizeB() const { return LaneSizeInBits() == kBRegSize; }
IsLaneSizeH()326   bool IsLaneSizeH() const { return LaneSizeInBits() == kHRegSize; }
IsLaneSizeS()327   bool IsLaneSizeS() const { return LaneSizeInBits() == kSRegSize; }
IsLaneSizeD()328   bool IsLaneSizeD() const { return LaneSizeInBits() == kDRegSize; }
329 
lanes()330   int lanes() const {
331     return lanes_;
332   }
333 
IsScalar()334   bool IsScalar() const {
335     return lanes_ == 1;
336   }
337 
IsVector()338   bool IsVector() const {
339     return lanes_ > 1;
340   }
341 
IsSameFormat(const VRegister & other)342   bool IsSameFormat(const VRegister& other) const {
343     return (size_ == other.size_) && (lanes_ == other.lanes_);
344   }
345 
LaneSizeInBytes()346   unsigned LaneSizeInBytes() const {
347     return SizeInBytes() / lanes_;
348   }
349 
LaneSizeInBits()350   unsigned LaneSizeInBits() const {
351     return LaneSizeInBytes() * 8;
352   }
353 
354  private:
355   static const VRegister bregisters[];
356   static const VRegister hregisters[];
357   static const VRegister sregisters[];
358   static const VRegister dregisters[];
359   static const VRegister qregisters[];
360   static const VRegister vregisters[];
361   int lanes_;
362 };
363 
364 
365 // Backward compatibility for FPRegisters.
366 typedef VRegister FPRegister;
367 
368 // No*Reg is used to indicate an unused argument, or an error case. Note that
369 // these all compare equal (using the Is() method). The Register and VRegister
370 // variants are provided for convenience.
371 const Register NoReg;
372 const VRegister NoVReg;
373 const FPRegister NoFPReg;  // For backward compatibility.
374 const CPURegister NoCPUReg;
375 
376 
377 #define DEFINE_REGISTERS(N)  \
378 const Register w##N(N, kWRegSize);  \
379 const Register x##N(N, kXRegSize);
380 REGISTER_CODE_LIST(DEFINE_REGISTERS)
381 #undef DEFINE_REGISTERS
382 const Register wsp(kSPRegInternalCode, kWRegSize);
383 const Register sp(kSPRegInternalCode, kXRegSize);
384 
385 
386 #define DEFINE_VREGISTERS(N)  \
387 const VRegister b##N(N, kBRegSize);  \
388 const VRegister h##N(N, kHRegSize);  \
389 const VRegister s##N(N, kSRegSize);  \
390 const VRegister d##N(N, kDRegSize);  \
391 const VRegister q##N(N, kQRegSize);  \
392 const VRegister v##N(N, kQRegSize);
393 REGISTER_CODE_LIST(DEFINE_VREGISTERS)
394 #undef DEFINE_VREGISTERS
395 
396 
397 // Registers aliases.
398 const Register ip0 = x16;
399 const Register ip1 = x17;
400 const Register lr = x30;
401 const Register xzr = x31;
402 const Register wzr = w31;
403 
404 
405 // AreAliased returns true if any of the named registers overlap. Arguments
406 // set to NoReg are ignored. The system stack pointer may be specified.
407 bool AreAliased(const CPURegister& reg1,
408                 const CPURegister& reg2,
409                 const CPURegister& reg3 = NoReg,
410                 const CPURegister& reg4 = NoReg,
411                 const CPURegister& reg5 = NoReg,
412                 const CPURegister& reg6 = NoReg,
413                 const CPURegister& reg7 = NoReg,
414                 const CPURegister& reg8 = NoReg);
415 
416 
417 // AreSameSizeAndType returns true if all of the specified registers have the
418 // same size, and are of the same type. The system stack pointer may be
419 // specified. Arguments set to NoReg are ignored, as are any subsequent
420 // arguments. At least one argument (reg1) must be valid (not NoCPUReg).
421 bool AreSameSizeAndType(const CPURegister& reg1,
422                         const CPURegister& reg2,
423                         const CPURegister& reg3 = NoCPUReg,
424                         const CPURegister& reg4 = NoCPUReg,
425                         const CPURegister& reg5 = NoCPUReg,
426                         const CPURegister& reg6 = NoCPUReg,
427                         const CPURegister& reg7 = NoCPUReg,
428                         const CPURegister& reg8 = NoCPUReg);
429 
430 
431 // AreSameFormat returns true if all of the specified VRegisters have the same
432 // vector format. Arguments set to NoReg are ignored, as are any subsequent
433 // arguments. At least one argument (reg1) must be valid (not NoVReg).
434 bool AreSameFormat(const VRegister& reg1,
435                    const VRegister& reg2,
436                    const VRegister& reg3 = NoVReg,
437                    const VRegister& reg4 = NoVReg);
438 
439 
440 // AreConsecutive returns true if all of the specified VRegisters are
441 // consecutive in the register file. Arguments set to NoReg are ignored, as are
442 // any subsequent arguments. At least one argument (reg1) must be valid
443 // (not NoVReg).
444 bool AreConsecutive(const VRegister& reg1,
445                     const VRegister& reg2,
446                     const VRegister& reg3 = NoVReg,
447                     const VRegister& reg4 = NoVReg);
448 
449 
450 // Lists of registers.
451 class CPURegList {
452  public:
453   explicit CPURegList(CPURegister reg1,
454                       CPURegister reg2 = NoCPUReg,
455                       CPURegister reg3 = NoCPUReg,
456                       CPURegister reg4 = NoCPUReg)
457       : list_(reg1.Bit() | reg2.Bit() | reg3.Bit() | reg4.Bit()),
458         size_(reg1.size()), type_(reg1.type()) {
459     VIXL_ASSERT(AreSameSizeAndType(reg1, reg2, reg3, reg4));
460     VIXL_ASSERT(IsValid());
461   }
462 
CPURegList(CPURegister::RegisterType type,unsigned size,RegList list)463   CPURegList(CPURegister::RegisterType type, unsigned size, RegList list)
464       : list_(list), size_(size), type_(type) {
465     VIXL_ASSERT(IsValid());
466   }
467 
CPURegList(CPURegister::RegisterType type,unsigned size,unsigned first_reg,unsigned last_reg)468   CPURegList(CPURegister::RegisterType type, unsigned size,
469              unsigned first_reg, unsigned last_reg)
470       : size_(size), type_(type) {
471     VIXL_ASSERT(((type == CPURegister::kRegister) &&
472                  (last_reg < kNumberOfRegisters)) ||
473                 ((type == CPURegister::kVRegister) &&
474                  (last_reg < kNumberOfVRegisters)));
475     VIXL_ASSERT(last_reg >= first_reg);
476     list_ = (UINT64_C(1) << (last_reg + 1)) - 1;
477     list_ &= ~((UINT64_C(1) << first_reg) - 1);
478     VIXL_ASSERT(IsValid());
479   }
480 
type()481   CPURegister::RegisterType type() const {
482     VIXL_ASSERT(IsValid());
483     return type_;
484   }
485 
486   // Combine another CPURegList into this one. Registers that already exist in
487   // this list are left unchanged. The type and size of the registers in the
488   // 'other' list must match those in this list.
Combine(const CPURegList & other)489   void Combine(const CPURegList& other) {
490     VIXL_ASSERT(IsValid());
491     VIXL_ASSERT(other.type() == type_);
492     VIXL_ASSERT(other.RegisterSizeInBits() == size_);
493     list_ |= other.list();
494   }
495 
496   // Remove every register in the other CPURegList from this one. Registers that
497   // do not exist in this list are ignored. The type and size of the registers
498   // in the 'other' list must match those in this list.
Remove(const CPURegList & other)499   void Remove(const CPURegList& other) {
500     VIXL_ASSERT(IsValid());
501     VIXL_ASSERT(other.type() == type_);
502     VIXL_ASSERT(other.RegisterSizeInBits() == size_);
503     list_ &= ~other.list();
504   }
505 
506   // Variants of Combine and Remove which take a single register.
Combine(const CPURegister & other)507   void Combine(const CPURegister& other) {
508     VIXL_ASSERT(other.type() == type_);
509     VIXL_ASSERT(other.size() == size_);
510     Combine(other.code());
511   }
512 
Remove(const CPURegister & other)513   void Remove(const CPURegister& other) {
514     VIXL_ASSERT(other.type() == type_);
515     VIXL_ASSERT(other.size() == size_);
516     Remove(other.code());
517   }
518 
519   // Variants of Combine and Remove which take a single register by its code;
520   // the type and size of the register is inferred from this list.
Combine(int code)521   void Combine(int code) {
522     VIXL_ASSERT(IsValid());
523     VIXL_ASSERT(CPURegister(code, size_, type_).IsValid());
524     list_ |= (UINT64_C(1) << code);
525   }
526 
Remove(int code)527   void Remove(int code) {
528     VIXL_ASSERT(IsValid());
529     VIXL_ASSERT(CPURegister(code, size_, type_).IsValid());
530     list_ &= ~(UINT64_C(1) << code);
531   }
532 
Union(const CPURegList & list_1,const CPURegList & list_2)533   static CPURegList Union(const CPURegList& list_1, const CPURegList& list_2) {
534     VIXL_ASSERT(list_1.type_ == list_2.type_);
535     VIXL_ASSERT(list_1.size_ == list_2.size_);
536     return CPURegList(list_1.type_, list_1.size_, list_1.list_ | list_2.list_);
537   }
538   static CPURegList Union(const CPURegList& list_1,
539                           const CPURegList& list_2,
540                           const CPURegList& list_3);
541   static CPURegList Union(const CPURegList& list_1,
542                           const CPURegList& list_2,
543                           const CPURegList& list_3,
544                           const CPURegList& list_4);
545 
Intersection(const CPURegList & list_1,const CPURegList & list_2)546   static CPURegList Intersection(const CPURegList& list_1,
547                                  const CPURegList& list_2) {
548     VIXL_ASSERT(list_1.type_ == list_2.type_);
549     VIXL_ASSERT(list_1.size_ == list_2.size_);
550     return CPURegList(list_1.type_, list_1.size_, list_1.list_ & list_2.list_);
551   }
552   static CPURegList Intersection(const CPURegList& list_1,
553                                  const CPURegList& list_2,
554                                  const CPURegList& list_3);
555   static CPURegList Intersection(const CPURegList& list_1,
556                                  const CPURegList& list_2,
557                                  const CPURegList& list_3,
558                                  const CPURegList& list_4);
559 
Overlaps(const CPURegList & other)560   bool Overlaps(const CPURegList& other) const {
561     return (type_ == other.type_) && ((list_ & other.list_) != 0);
562   }
563 
list()564   RegList list() const {
565     VIXL_ASSERT(IsValid());
566     return list_;
567   }
568 
set_list(RegList new_list)569   void set_list(RegList new_list) {
570     VIXL_ASSERT(IsValid());
571     list_ = new_list;
572   }
573 
574   // Remove all callee-saved registers from the list. This can be useful when
575   // preparing registers for an AAPCS64 function call, for example.
576   void RemoveCalleeSaved();
577 
578   CPURegister PopLowestIndex();
579   CPURegister PopHighestIndex();
580 
581   // AAPCS64 callee-saved registers.
582   static CPURegList GetCalleeSaved(unsigned size = kXRegSize);
583   static CPURegList GetCalleeSavedV(unsigned size = kDRegSize);
584 
585   // AAPCS64 caller-saved registers. Note that this includes lr.
586   // TODO(all): Determine how we handle d8-d15 being callee-saved, but the top
587   // 64-bits being caller-saved.
588   static CPURegList GetCallerSaved(unsigned size = kXRegSize);
589   static CPURegList GetCallerSavedV(unsigned size = kDRegSize);
590 
IsEmpty()591   bool IsEmpty() const {
592     VIXL_ASSERT(IsValid());
593     return list_ == 0;
594   }
595 
IncludesAliasOf(const CPURegister & other)596   bool IncludesAliasOf(const CPURegister& other) const {
597     VIXL_ASSERT(IsValid());
598     return (type_ == other.type()) && ((other.Bit() & list_) != 0);
599   }
600 
IncludesAliasOf(int code)601   bool IncludesAliasOf(int code) const {
602     VIXL_ASSERT(IsValid());
603     return ((code & list_) != 0);
604   }
605 
Count()606   int Count() const {
607     VIXL_ASSERT(IsValid());
608     return CountSetBits(list_);
609   }
610 
RegisterSizeInBits()611   unsigned RegisterSizeInBits() const {
612     VIXL_ASSERT(IsValid());
613     return size_;
614   }
615 
RegisterSizeInBytes()616   unsigned RegisterSizeInBytes() const {
617     int size_in_bits = RegisterSizeInBits();
618     VIXL_ASSERT((size_in_bits % 8) == 0);
619     return size_in_bits / 8;
620   }
621 
TotalSizeInBytes()622   unsigned TotalSizeInBytes() const {
623     VIXL_ASSERT(IsValid());
624     return RegisterSizeInBytes() * Count();
625   }
626 
627  private:
628   RegList list_;
629   unsigned size_;
630   CPURegister::RegisterType type_;
631 
632   bool IsValid() const;
633 };
634 
635 
636 // AAPCS64 callee-saved registers.
637 extern const CPURegList kCalleeSaved;
638 extern const CPURegList kCalleeSavedV;
639 
640 
641 // AAPCS64 caller-saved registers. Note that this includes lr.
642 extern const CPURegList kCallerSaved;
643 extern const CPURegList kCallerSavedV;
644 
645 
646 // Operand.
647 class Operand {
648  public:
649   // #<immediate>
650   // where <immediate> is int64_t.
651   // This is allowed to be an implicit constructor because Operand is
652   // a wrapper class that doesn't normally perform any type conversion.
653   Operand(int64_t immediate = 0);           // NOLINT(runtime/explicit)
654 
655   // rm, {<shift> #<shift_amount>}
656   // where <shift> is one of {LSL, LSR, ASR, ROR}.
657   //       <shift_amount> is uint6_t.
658   // This is allowed to be an implicit constructor because Operand is
659   // a wrapper class that doesn't normally perform any type conversion.
660   Operand(Register reg,
661           Shift shift = LSL,
662           unsigned shift_amount = 0);   // NOLINT(runtime/explicit)
663 
664   // rm, {<extend> {#<shift_amount>}}
665   // where <extend> is one of {UXTB, UXTH, UXTW, UXTX, SXTB, SXTH, SXTW, SXTX}.
666   //       <shift_amount> is uint2_t.
667   explicit Operand(Register reg, Extend extend, unsigned shift_amount = 0);
668 
669   bool IsImmediate() const;
670   bool IsShiftedRegister() const;
671   bool IsExtendedRegister() const;
672   bool IsZero() const;
673 
674   // This returns an LSL shift (<= 4) operand as an equivalent extend operand,
675   // which helps in the encoding of instructions that use the stack pointer.
676   Operand ToExtendedRegister() const;
677 
immediate()678   int64_t immediate() const {
679     VIXL_ASSERT(IsImmediate());
680     return immediate_;
681   }
682 
reg()683   Register reg() const {
684     VIXL_ASSERT(IsShiftedRegister() || IsExtendedRegister());
685     return reg_;
686   }
687 
shift()688   Shift shift() const {
689     VIXL_ASSERT(IsShiftedRegister());
690     return shift_;
691   }
692 
extend()693   Extend extend() const {
694     VIXL_ASSERT(IsExtendedRegister());
695     return extend_;
696   }
697 
shift_amount()698   unsigned shift_amount() const {
699     VIXL_ASSERT(IsShiftedRegister() || IsExtendedRegister());
700     return shift_amount_;
701   }
702 
703  private:
704   int64_t immediate_;
705   Register reg_;
706   Shift shift_;
707   Extend extend_;
708   unsigned shift_amount_;
709 };
710 
711 
712 // MemOperand represents the addressing mode of a load or store instruction.
713 class MemOperand {
714  public:
715   explicit MemOperand(Register base,
716                       int64_t offset = 0,
717                       AddrMode addrmode = Offset);
718   MemOperand(Register base,
719              Register regoffset,
720              Shift shift = LSL,
721              unsigned shift_amount = 0);
722   MemOperand(Register base,
723              Register regoffset,
724              Extend extend,
725              unsigned shift_amount = 0);
726   MemOperand(Register base,
727              const Operand& offset,
728              AddrMode addrmode = Offset);
729 
base()730   const Register& base() const { return base_; }
regoffset()731   const Register& regoffset() const { return regoffset_; }
offset()732   int64_t offset() const { return offset_; }
addrmode()733   AddrMode addrmode() const { return addrmode_; }
shift()734   Shift shift() const { return shift_; }
extend()735   Extend extend() const { return extend_; }
shift_amount()736   unsigned shift_amount() const { return shift_amount_; }
737   bool IsImmediateOffset() const;
738   bool IsRegisterOffset() const;
739   bool IsPreIndex() const;
740   bool IsPostIndex() const;
741 
742   void AddOffset(int64_t offset);
743 
744  private:
745   Register base_;
746   Register regoffset_;
747   int64_t offset_;
748   AddrMode addrmode_;
749   Shift shift_;
750   Extend extend_;
751   unsigned shift_amount_;
752 };
753 
754 
755 class LabelTestHelper;  // Forward declaration.
756 
757 
758 class Label {
759  public:
Label()760   Label() : location_(kLocationUnbound) {}
~Label()761   ~Label() {
762     // If the label has been linked to, it needs to be bound to a target.
763     VIXL_ASSERT(!IsLinked() || IsBound());
764   }
765 
IsBound()766   bool IsBound() const { return location_ >= 0; }
IsLinked()767   bool IsLinked() const { return !links_.empty(); }
768 
location()769   ptrdiff_t location() const { return location_; }
770 
771   static const int kNPreallocatedLinks = 4;
772   static const ptrdiff_t kInvalidLinkKey = PTRDIFF_MAX;
773   static const size_t kReclaimFrom = 512;
774   static const size_t kReclaimFactor = 2;
775 
776   typedef InvalSet<ptrdiff_t,
777                    kNPreallocatedLinks,
778                    ptrdiff_t,
779                    kInvalidLinkKey,
780                    kReclaimFrom,
781                    kReclaimFactor> LinksSetBase;
782   typedef InvalSetIterator<LinksSetBase> LabelLinksIteratorBase;
783 
784  private:
785   class LinksSet : public LinksSetBase {
786    public:
LinksSet()787     LinksSet() : LinksSetBase() {}
788   };
789 
790   // Allows iterating over the links of a label. The behaviour is undefined if
791   // the list of links is modified in any way while iterating.
792   class LabelLinksIterator : public LabelLinksIteratorBase {
793    public:
LabelLinksIterator(Label * label)794     explicit LabelLinksIterator(Label* label)
795         : LabelLinksIteratorBase(&label->links_) {}
796   };
797 
Bind(ptrdiff_t location)798   void Bind(ptrdiff_t location) {
799     // Labels can only be bound once.
800     VIXL_ASSERT(!IsBound());
801     location_ = location;
802   }
803 
AddLink(ptrdiff_t instruction)804   void AddLink(ptrdiff_t instruction) {
805     // If a label is bound, the assembler already has the information it needs
806     // to write the instruction, so there is no need to add it to links_.
807     VIXL_ASSERT(!IsBound());
808     links_.insert(instruction);
809   }
810 
DeleteLink(ptrdiff_t instruction)811   void DeleteLink(ptrdiff_t instruction) {
812     links_.erase(instruction);
813   }
814 
ClearAllLinks()815   void ClearAllLinks() {
816     links_.clear();
817   }
818 
819   // TODO: The comment below considers average case complexity for our
820   // usual use-cases. The elements of interest are:
821   // - Branches to a label are emitted in order: branch instructions to a label
822   // are generated at an offset in the code generation buffer greater than any
823   // other branch to that same label already generated. As an example, this can
824   // be broken when an instruction is patched to become a branch. Note that the
825   // code will still work, but the complexity considerations below may locally
826   // not apply any more.
827   // - Veneers are generated in order: for multiple branches of the same type
828   // branching to the same unbound label going out of range, veneers are
829   // generated in growing order of the branch instruction offset from the start
830   // of the buffer.
831   //
832   // When creating a veneer for a branch going out of range, the link for this
833   // branch needs to be removed from this `links_`. Since all branches are
834   // tracked in one underlying InvalSet, the complexity for this deletion is the
835   // same as for finding the element, ie. O(n), where n is the number of links
836   // in the set.
837   // This could be reduced to O(1) by using the same trick as used when tracking
838   // branch information for veneers: split the container to use one set per type
839   // of branch. With that setup, when a veneer is created and the link needs to
840   // be deleted, if the two points above hold, it must be the minimum element of
841   // the set for its type of branch, and that minimum element will be accessible
842   // in O(1).
843 
844   // The offsets of the instructions that have linked to this label.
845   LinksSet links_;
846   // The label location.
847   ptrdiff_t location_;
848 
849   static const ptrdiff_t kLocationUnbound = -1;
850 
851   // It is not safe to copy labels, so disable the copy constructor and operator
852   // by declaring them private (without an implementation).
853   Label(const Label&);
854   void operator=(const Label&);
855 
856   // The Assembler class is responsible for binding and linking labels, since
857   // the stored offsets need to be consistent with the Assembler's buffer.
858   friend class Assembler;
859   // The MacroAssembler and VeneerPool handle resolution of branches to distant
860   // targets.
861   friend class MacroAssembler;
862   friend class VeneerPool;
863 };
864 
865 
866 // Required InvalSet template specialisations.
867 #define INVAL_SET_TEMPLATE_PARAMETERS \
868     ptrdiff_t,                        \
869     Label::kNPreallocatedLinks,       \
870     ptrdiff_t,                        \
871     Label::kInvalidLinkKey,           \
872     Label::kReclaimFrom,              \
873     Label::kReclaimFactor
874 template<>
Key(const ptrdiff_t & element)875 inline ptrdiff_t InvalSet<INVAL_SET_TEMPLATE_PARAMETERS>::Key(
876     const ptrdiff_t& element) {
877   return element;
878 }
879 template<>
SetKey(ptrdiff_t * element,ptrdiff_t key)880 inline void InvalSet<INVAL_SET_TEMPLATE_PARAMETERS>::SetKey(
881               ptrdiff_t* element, ptrdiff_t key) {
882   *element = key;
883 }
884 #undef INVAL_SET_TEMPLATE_PARAMETERS
885 
886 
887 // A literal is a 32-bit or 64-bit piece of data stored in the instruction
888 // stream and loaded through a pc relative load. The same literal can be
889 // referred to by multiple instructions but a literal can only reside at one
890 // place in memory. A literal can be used by a load before or after being
891 // placed in memory.
892 //
893 // Internally an offset of 0 is associated with a literal which has been
894 // neither used nor placed. Then two possibilities arise:
895 //  1) the label is placed, the offset (stored as offset + 1) is used to
896 //     resolve any subsequent load using the label.
897 //  2) the label is not placed and offset is the offset of the last load using
898 //     the literal (stored as -offset -1). If multiple loads refer to this
899 //     literal then the last load holds the offset of the preceding load and
900 //     all loads form a chain. Once the offset is placed all the loads in the
901 //     chain are resolved and future loads fall back to possibility 1.
902 class RawLiteral {
903  public:
RawLiteral()904   RawLiteral() : size_(0), offset_(0), low64_(0), high64_(0) {}
905 
size()906   size_t size() {
907     VIXL_STATIC_ASSERT(kDRegSizeInBytes == kXRegSizeInBytes);
908     VIXL_STATIC_ASSERT(kSRegSizeInBytes == kWRegSizeInBytes);
909     VIXL_ASSERT((size_ == kXRegSizeInBytes) ||
910                 (size_ == kWRegSizeInBytes) ||
911                 (size_ == kQRegSizeInBytes));
912     return size_;
913   }
raw_value128_low64()914   uint64_t raw_value128_low64() {
915     VIXL_ASSERT(size_ == kQRegSizeInBytes);
916     return low64_;
917   }
raw_value128_high64()918   uint64_t raw_value128_high64() {
919     VIXL_ASSERT(size_ == kQRegSizeInBytes);
920     return high64_;
921   }
raw_value64()922   uint64_t raw_value64() {
923     VIXL_ASSERT(size_ == kXRegSizeInBytes);
924     VIXL_ASSERT(high64_ == 0);
925     return low64_;
926   }
raw_value32()927   uint32_t raw_value32() {
928     VIXL_ASSERT(size_ == kWRegSizeInBytes);
929     VIXL_ASSERT(high64_ == 0);
930     VIXL_ASSERT(is_uint32(low64_) || is_int32(low64_));
931     return static_cast<uint32_t>(low64_);
932   }
IsUsed()933   bool IsUsed() { return offset_ < 0; }
IsPlaced()934   bool IsPlaced() { return offset_ > 0; }
935 
936  protected:
offset()937   ptrdiff_t offset() {
938     VIXL_ASSERT(IsPlaced());
939     return offset_ - 1;
940   }
set_offset(ptrdiff_t offset)941   void set_offset(ptrdiff_t offset) {
942     VIXL_ASSERT(offset >= 0);
943     VIXL_ASSERT(IsWordAligned(offset));
944     VIXL_ASSERT(!IsPlaced());
945     offset_ = offset + 1;
946   }
last_use()947   ptrdiff_t last_use() {
948     VIXL_ASSERT(IsUsed());
949     return -offset_ - 1;
950   }
set_last_use(ptrdiff_t offset)951   void set_last_use(ptrdiff_t offset) {
952     VIXL_ASSERT(offset >= 0);
953     VIXL_ASSERT(IsWordAligned(offset));
954     VIXL_ASSERT(!IsPlaced());
955     offset_ = -offset - 1;
956   }
957 
958   size_t size_;
959   ptrdiff_t offset_;
960   uint64_t low64_;
961   uint64_t high64_;
962 
963   friend class Assembler;
964 };
965 
966 
967 template <typename T>
968 class Literal : public RawLiteral {
969  public:
Literal(T value)970   explicit Literal(T value) {
971     VIXL_STATIC_ASSERT(sizeof(T) <= kXRegSizeInBytes);
972     size_ = sizeof(value);
973     memcpy(&low64_, &value, sizeof(value));
974   }
975 
Literal(T high64,T low64)976   Literal(T high64, T low64) {
977     VIXL_STATIC_ASSERT(sizeof(T) == (kQRegSizeInBytes / 2));
978     size_ = kQRegSizeInBytes;
979     memcpy(&low64_, &low64, sizeof(low64));
980     memcpy(&high64_, &high64, sizeof(high64));
981   }
982 };
983 
984 
985 // Control whether or not position-independent code should be emitted.
986 enum PositionIndependentCodeOption {
987   // All code generated will be position-independent; all branches and
988   // references to labels generated with the Label class will use PC-relative
989   // addressing.
990   PositionIndependentCode,
991 
992   // Allow VIXL to generate code that refers to absolute addresses. With this
993   // option, it will not be possible to copy the code buffer and run it from a
994   // different address; code must be generated in its final location.
995   PositionDependentCode,
996 
997   // Allow VIXL to assume that the bottom 12 bits of the address will be
998   // constant, but that the top 48 bits may change. This allows `adrp` to
999   // function in systems which copy code between pages, but otherwise maintain
1000   // 4KB page alignment.
1001   PageOffsetDependentCode
1002 };
1003 
1004 
1005 // Control how scaled- and unscaled-offset loads and stores are generated.
1006 enum LoadStoreScalingOption {
1007   // Prefer scaled-immediate-offset instructions, but emit unscaled-offset,
1008   // register-offset, pre-index or post-index instructions if necessary.
1009   PreferScaledOffset,
1010 
1011   // Prefer unscaled-immediate-offset instructions, but emit scaled-offset,
1012   // register-offset, pre-index or post-index instructions if necessary.
1013   PreferUnscaledOffset,
1014 
1015   // Require scaled-immediate-offset instructions.
1016   RequireScaledOffset,
1017 
1018   // Require unscaled-immediate-offset instructions.
1019   RequireUnscaledOffset
1020 };
1021 
1022 
1023 // Assembler.
1024 class Assembler {
1025  public:
1026   Assembler(size_t capacity,
1027             PositionIndependentCodeOption pic = PositionIndependentCode);
1028   Assembler(byte* buffer, size_t capacity,
1029             PositionIndependentCodeOption pic = PositionIndependentCode);
1030 
1031   // The destructor asserts that one of the following is true:
1032   //  * The Assembler object has not been used.
1033   //  * Nothing has been emitted since the last Reset() call.
1034   //  * Nothing has been emitted since the last FinalizeCode() call.
1035   ~Assembler();
1036 
1037   // System functions.
1038 
1039   // Start generating code from the beginning of the buffer, discarding any code
1040   // and data that has already been emitted into the buffer.
1041   void Reset();
1042 
1043   // Finalize a code buffer of generated instructions. This function must be
1044   // called before executing or copying code from the buffer.
1045   void FinalizeCode();
1046 
1047   // Label.
1048   // Bind a label to the current PC.
1049   void bind(Label* label);
1050 
1051   // Bind a label to a specified offset from the start of the buffer.
1052   void BindToOffset(Label* label, ptrdiff_t offset);
1053 
1054   // Place a literal at the current PC.
1055   void place(RawLiteral* literal);
1056 
CursorOffset()1057   ptrdiff_t CursorOffset() const {
1058     return buffer_->CursorOffset();
1059   }
1060 
BufferEndOffset()1061   ptrdiff_t BufferEndOffset() const {
1062     return static_cast<ptrdiff_t>(buffer_->capacity());
1063   }
1064 
1065   // Return the address of an offset in the buffer.
1066   template <typename T>
GetOffsetAddress(ptrdiff_t offset)1067   T GetOffsetAddress(ptrdiff_t offset) {
1068     VIXL_STATIC_ASSERT(sizeof(T) >= sizeof(uintptr_t));
1069     return buffer_->GetOffsetAddress<T>(offset);
1070   }
1071 
1072   // Return the address of a bound label.
1073   template <typename T>
GetLabelAddress(const Label * label)1074   T GetLabelAddress(const Label * label) {
1075     VIXL_ASSERT(label->IsBound());
1076     VIXL_STATIC_ASSERT(sizeof(T) >= sizeof(uintptr_t));
1077     return GetOffsetAddress<T>(label->location());
1078   }
1079 
1080   // Return the address of the cursor.
1081   template <typename T>
GetCursorAddress()1082   T GetCursorAddress() {
1083     VIXL_STATIC_ASSERT(sizeof(T) >= sizeof(uintptr_t));
1084     return GetOffsetAddress<T>(CursorOffset());
1085   }
1086 
1087   // Return the address of the start of the buffer.
1088   template <typename T>
GetStartAddress()1089   T GetStartAddress() {
1090     VIXL_STATIC_ASSERT(sizeof(T) >= sizeof(uintptr_t));
1091     return GetOffsetAddress<T>(0);
1092   }
1093 
InstructionAt(ptrdiff_t instruction_offset)1094   Instruction* InstructionAt(ptrdiff_t instruction_offset) {
1095     return GetOffsetAddress<Instruction*>(instruction_offset);
1096   }
1097 
InstructionOffset(Instruction * instruction)1098   ptrdiff_t InstructionOffset(Instruction* instruction) {
1099     VIXL_STATIC_ASSERT(sizeof(*instruction) == 1);
1100     ptrdiff_t offset = instruction - GetStartAddress<Instruction*>();
1101     VIXL_ASSERT((0 <= offset) &&
1102                 (offset < static_cast<ptrdiff_t>(BufferCapacity())));
1103     return offset;
1104   }
1105 
1106   // Instruction set functions.
1107 
1108   // Branch / Jump instructions.
1109   // Branch to register.
1110   void br(const Register& xn);
1111 
1112   // Branch with link to register.
1113   void blr(const Register& xn);
1114 
1115   // Branch to register with return hint.
1116   void ret(const Register& xn = lr);
1117 
1118   // Unconditional branch to label.
1119   void b(Label* label);
1120 
1121   // Conditional branch to label.
1122   void b(Label* label, Condition cond);
1123 
1124   // Unconditional branch to PC offset.
1125   void b(int imm26);
1126 
1127   // Conditional branch to PC offset.
1128   void b(int imm19, Condition cond);
1129 
1130   // Branch with link to label.
1131   void bl(Label* label);
1132 
1133   // Branch with link to PC offset.
1134   void bl(int imm26);
1135 
1136   // Compare and branch to label if zero.
1137   void cbz(const Register& rt, Label* label);
1138 
1139   // Compare and branch to PC offset if zero.
1140   void cbz(const Register& rt, int imm19);
1141 
1142   // Compare and branch to label if not zero.
1143   void cbnz(const Register& rt, Label* label);
1144 
1145   // Compare and branch to PC offset if not zero.
1146   void cbnz(const Register& rt, int imm19);
1147 
1148   // Table lookup from one register.
1149   void tbl(const VRegister& vd,
1150            const VRegister& vn,
1151            const VRegister& vm);
1152 
1153   // Table lookup from two registers.
1154   void tbl(const VRegister& vd,
1155            const VRegister& vn,
1156            const VRegister& vn2,
1157            const VRegister& vm);
1158 
1159   // Table lookup from three registers.
1160   void tbl(const VRegister& vd,
1161            const VRegister& vn,
1162            const VRegister& vn2,
1163            const VRegister& vn3,
1164            const VRegister& vm);
1165 
1166   // Table lookup from four registers.
1167   void tbl(const VRegister& vd,
1168            const VRegister& vn,
1169            const VRegister& vn2,
1170            const VRegister& vn3,
1171            const VRegister& vn4,
1172            const VRegister& vm);
1173 
1174   // Table lookup extension from one register.
1175   void tbx(const VRegister& vd,
1176            const VRegister& vn,
1177            const VRegister& vm);
1178 
1179   // Table lookup extension from two registers.
1180   void tbx(const VRegister& vd,
1181            const VRegister& vn,
1182            const VRegister& vn2,
1183            const VRegister& vm);
1184 
1185   // Table lookup extension from three registers.
1186   void tbx(const VRegister& vd,
1187            const VRegister& vn,
1188            const VRegister& vn2,
1189            const VRegister& vn3,
1190            const VRegister& vm);
1191 
1192   // Table lookup extension from four registers.
1193   void tbx(const VRegister& vd,
1194            const VRegister& vn,
1195            const VRegister& vn2,
1196            const VRegister& vn3,
1197            const VRegister& vn4,
1198            const VRegister& vm);
1199 
1200   // Test bit and branch to label if zero.
1201   void tbz(const Register& rt, unsigned bit_pos, Label* label);
1202 
1203   // Test bit and branch to PC offset if zero.
1204   void tbz(const Register& rt, unsigned bit_pos, int imm14);
1205 
1206   // Test bit and branch to label if not zero.
1207   void tbnz(const Register& rt, unsigned bit_pos, Label* label);
1208 
1209   // Test bit and branch to PC offset if not zero.
1210   void tbnz(const Register& rt, unsigned bit_pos, int imm14);
1211 
1212   // Address calculation instructions.
1213   // Calculate a PC-relative address. Unlike for branches the offset in adr is
1214   // unscaled (i.e. the result can be unaligned).
1215 
1216   // Calculate the address of a label.
1217   void adr(const Register& rd, Label* label);
1218 
1219   // Calculate the address of a PC offset.
1220   void adr(const Register& rd, int imm21);
1221 
1222   // Calculate the page address of a label.
1223   void adrp(const Register& rd, Label* label);
1224 
1225   // Calculate the page address of a PC offset.
1226   void adrp(const Register& rd, int imm21);
1227 
1228   // Data Processing instructions.
1229   // Add.
1230   void add(const Register& rd,
1231            const Register& rn,
1232            const Operand& operand);
1233 
1234   // Add and update status flags.
1235   void adds(const Register& rd,
1236             const Register& rn,
1237             const Operand& operand);
1238 
1239   // Compare negative.
1240   void cmn(const Register& rn, const Operand& operand);
1241 
1242   // Subtract.
1243   void sub(const Register& rd,
1244            const Register& rn,
1245            const Operand& operand);
1246 
1247   // Subtract and update status flags.
1248   void subs(const Register& rd,
1249             const Register& rn,
1250             const Operand& operand);
1251 
1252   // Compare.
1253   void cmp(const Register& rn, const Operand& operand);
1254 
1255   // Negate.
1256   void neg(const Register& rd,
1257            const Operand& operand);
1258 
1259   // Negate and update status flags.
1260   void negs(const Register& rd,
1261             const Operand& operand);
1262 
1263   // Add with carry bit.
1264   void adc(const Register& rd,
1265            const Register& rn,
1266            const Operand& operand);
1267 
1268   // Add with carry bit and update status flags.
1269   void adcs(const Register& rd,
1270             const Register& rn,
1271             const Operand& operand);
1272 
1273   // Subtract with carry bit.
1274   void sbc(const Register& rd,
1275            const Register& rn,
1276            const Operand& operand);
1277 
1278   // Subtract with carry bit and update status flags.
1279   void sbcs(const Register& rd,
1280             const Register& rn,
1281             const Operand& operand);
1282 
1283   // Negate with carry bit.
1284   void ngc(const Register& rd,
1285            const Operand& operand);
1286 
1287   // Negate with carry bit and update status flags.
1288   void ngcs(const Register& rd,
1289             const Operand& operand);
1290 
1291   // Logical instructions.
1292   // Bitwise and (A & B).
1293   void and_(const Register& rd,
1294             const Register& rn,
1295             const Operand& operand);
1296 
1297   // Bitwise and (A & B) and update status flags.
1298   void ands(const Register& rd,
1299             const Register& rn,
1300             const Operand& operand);
1301 
1302   // Bit test and set flags.
1303   void tst(const Register& rn, const Operand& operand);
1304 
1305   // Bit clear (A & ~B).
1306   void bic(const Register& rd,
1307            const Register& rn,
1308            const Operand& operand);
1309 
1310   // Bit clear (A & ~B) and update status flags.
1311   void bics(const Register& rd,
1312             const Register& rn,
1313             const Operand& operand);
1314 
1315   // Bitwise or (A | B).
1316   void orr(const Register& rd, const Register& rn, const Operand& operand);
1317 
1318   // Bitwise nor (A | ~B).
1319   void orn(const Register& rd, const Register& rn, const Operand& operand);
1320 
1321   // Bitwise eor/xor (A ^ B).
1322   void eor(const Register& rd, const Register& rn, const Operand& operand);
1323 
1324   // Bitwise enor/xnor (A ^ ~B).
1325   void eon(const Register& rd, const Register& rn, const Operand& operand);
1326 
1327   // Logical shift left by variable.
1328   void lslv(const Register& rd, const Register& rn, const Register& rm);
1329 
1330   // Logical shift right by variable.
1331   void lsrv(const Register& rd, const Register& rn, const Register& rm);
1332 
1333   // Arithmetic shift right by variable.
1334   void asrv(const Register& rd, const Register& rn, const Register& rm);
1335 
1336   // Rotate right by variable.
1337   void rorv(const Register& rd, const Register& rn, const Register& rm);
1338 
1339   // Bitfield instructions.
1340   // Bitfield move.
1341   void bfm(const Register& rd,
1342            const Register& rn,
1343            unsigned immr,
1344            unsigned imms);
1345 
1346   // Signed bitfield move.
1347   void sbfm(const Register& rd,
1348             const Register& rn,
1349             unsigned immr,
1350             unsigned imms);
1351 
1352   // Unsigned bitfield move.
1353   void ubfm(const Register& rd,
1354             const Register& rn,
1355             unsigned immr,
1356             unsigned imms);
1357 
1358   // Bfm aliases.
1359   // Bitfield insert.
bfi(const Register & rd,const Register & rn,unsigned lsb,unsigned width)1360   void bfi(const Register& rd,
1361            const Register& rn,
1362            unsigned lsb,
1363            unsigned width) {
1364     VIXL_ASSERT(width >= 1);
1365     VIXL_ASSERT(lsb + width <= rn.size());
1366     bfm(rd, rn, (rd.size() - lsb) & (rd.size() - 1), width - 1);
1367   }
1368 
1369   // Bitfield extract and insert low.
bfxil(const Register & rd,const Register & rn,unsigned lsb,unsigned width)1370   void bfxil(const Register& rd,
1371              const Register& rn,
1372              unsigned lsb,
1373              unsigned width) {
1374     VIXL_ASSERT(width >= 1);
1375     VIXL_ASSERT(lsb + width <= rn.size());
1376     bfm(rd, rn, lsb, lsb + width - 1);
1377   }
1378 
1379   // Sbfm aliases.
1380   // Arithmetic shift right.
asr(const Register & rd,const Register & rn,unsigned shift)1381   void asr(const Register& rd, const Register& rn, unsigned shift) {
1382     VIXL_ASSERT(shift < rd.size());
1383     sbfm(rd, rn, shift, rd.size() - 1);
1384   }
1385 
1386   // Signed bitfield insert with zero at right.
sbfiz(const Register & rd,const Register & rn,unsigned lsb,unsigned width)1387   void sbfiz(const Register& rd,
1388              const Register& rn,
1389              unsigned lsb,
1390              unsigned width) {
1391     VIXL_ASSERT(width >= 1);
1392     VIXL_ASSERT(lsb + width <= rn.size());
1393     sbfm(rd, rn, (rd.size() - lsb) & (rd.size() - 1), width - 1);
1394   }
1395 
1396   // Signed bitfield extract.
sbfx(const Register & rd,const Register & rn,unsigned lsb,unsigned width)1397   void sbfx(const Register& rd,
1398             const Register& rn,
1399             unsigned lsb,
1400             unsigned width) {
1401     VIXL_ASSERT(width >= 1);
1402     VIXL_ASSERT(lsb + width <= rn.size());
1403     sbfm(rd, rn, lsb, lsb + width - 1);
1404   }
1405 
1406   // Signed extend byte.
sxtb(const Register & rd,const Register & rn)1407   void sxtb(const Register& rd, const Register& rn) {
1408     sbfm(rd, rn, 0, 7);
1409   }
1410 
1411   // Signed extend halfword.
sxth(const Register & rd,const Register & rn)1412   void sxth(const Register& rd, const Register& rn) {
1413     sbfm(rd, rn, 0, 15);
1414   }
1415 
1416   // Signed extend word.
sxtw(const Register & rd,const Register & rn)1417   void sxtw(const Register& rd, const Register& rn) {
1418     sbfm(rd, rn, 0, 31);
1419   }
1420 
1421   // Ubfm aliases.
1422   // Logical shift left.
lsl(const Register & rd,const Register & rn,unsigned shift)1423   void lsl(const Register& rd, const Register& rn, unsigned shift) {
1424     unsigned reg_size = rd.size();
1425     VIXL_ASSERT(shift < reg_size);
1426     ubfm(rd, rn, (reg_size - shift) % reg_size, reg_size - shift - 1);
1427   }
1428 
1429   // Logical shift right.
lsr(const Register & rd,const Register & rn,unsigned shift)1430   void lsr(const Register& rd, const Register& rn, unsigned shift) {
1431     VIXL_ASSERT(shift < rd.size());
1432     ubfm(rd, rn, shift, rd.size() - 1);
1433   }
1434 
1435   // Unsigned bitfield insert with zero at right.
ubfiz(const Register & rd,const Register & rn,unsigned lsb,unsigned width)1436   void ubfiz(const Register& rd,
1437              const Register& rn,
1438              unsigned lsb,
1439              unsigned width) {
1440     VIXL_ASSERT(width >= 1);
1441     VIXL_ASSERT(lsb + width <= rn.size());
1442     ubfm(rd, rn, (rd.size() - lsb) & (rd.size() - 1), width - 1);
1443   }
1444 
1445   // Unsigned bitfield extract.
ubfx(const Register & rd,const Register & rn,unsigned lsb,unsigned width)1446   void ubfx(const Register& rd,
1447             const Register& rn,
1448             unsigned lsb,
1449             unsigned width) {
1450     VIXL_ASSERT(width >= 1);
1451     VIXL_ASSERT(lsb + width <= rn.size());
1452     ubfm(rd, rn, lsb, lsb + width - 1);
1453   }
1454 
1455   // Unsigned extend byte.
uxtb(const Register & rd,const Register & rn)1456   void uxtb(const Register& rd, const Register& rn) {
1457     ubfm(rd, rn, 0, 7);
1458   }
1459 
1460   // Unsigned extend halfword.
uxth(const Register & rd,const Register & rn)1461   void uxth(const Register& rd, const Register& rn) {
1462     ubfm(rd, rn, 0, 15);
1463   }
1464 
1465   // Unsigned extend word.
uxtw(const Register & rd,const Register & rn)1466   void uxtw(const Register& rd, const Register& rn) {
1467     ubfm(rd, rn, 0, 31);
1468   }
1469 
1470   // Extract.
1471   void extr(const Register& rd,
1472             const Register& rn,
1473             const Register& rm,
1474             unsigned lsb);
1475 
1476   // Conditional select: rd = cond ? rn : rm.
1477   void csel(const Register& rd,
1478             const Register& rn,
1479             const Register& rm,
1480             Condition cond);
1481 
1482   // Conditional select increment: rd = cond ? rn : rm + 1.
1483   void csinc(const Register& rd,
1484              const Register& rn,
1485              const Register& rm,
1486              Condition cond);
1487 
1488   // Conditional select inversion: rd = cond ? rn : ~rm.
1489   void csinv(const Register& rd,
1490              const Register& rn,
1491              const Register& rm,
1492              Condition cond);
1493 
1494   // Conditional select negation: rd = cond ? rn : -rm.
1495   void csneg(const Register& rd,
1496              const Register& rn,
1497              const Register& rm,
1498              Condition cond);
1499 
1500   // Conditional set: rd = cond ? 1 : 0.
1501   void cset(const Register& rd, Condition cond);
1502 
1503   // Conditional set mask: rd = cond ? -1 : 0.
1504   void csetm(const Register& rd, Condition cond);
1505 
1506   // Conditional increment: rd = cond ? rn + 1 : rn.
1507   void cinc(const Register& rd, const Register& rn, Condition cond);
1508 
1509   // Conditional invert: rd = cond ? ~rn : rn.
1510   void cinv(const Register& rd, const Register& rn, Condition cond);
1511 
1512   // Conditional negate: rd = cond ? -rn : rn.
1513   void cneg(const Register& rd, const Register& rn, Condition cond);
1514 
1515   // Rotate right.
ror(const Register & rd,const Register & rs,unsigned shift)1516   void ror(const Register& rd, const Register& rs, unsigned shift) {
1517     extr(rd, rs, rs, shift);
1518   }
1519 
1520   // Conditional comparison.
1521   // Conditional compare negative.
1522   void ccmn(const Register& rn,
1523             const Operand& operand,
1524             StatusFlags nzcv,
1525             Condition cond);
1526 
1527   // Conditional compare.
1528   void ccmp(const Register& rn,
1529             const Operand& operand,
1530             StatusFlags nzcv,
1531             Condition cond);
1532 
1533   // CRC-32 checksum from byte.
1534   void crc32b(const Register& rd,
1535               const Register& rn,
1536               const Register& rm);
1537 
1538   // CRC-32 checksum from half-word.
1539   void crc32h(const Register& rd,
1540               const Register& rn,
1541               const Register& rm);
1542 
1543   // CRC-32 checksum from word.
1544   void crc32w(const Register& rd,
1545               const Register& rn,
1546               const Register& rm);
1547 
1548   // CRC-32 checksum from double word.
1549   void crc32x(const Register& rd,
1550               const Register& rn,
1551               const Register& rm);
1552 
1553   // CRC-32 C checksum from byte.
1554   void crc32cb(const Register& rd,
1555                const Register& rn,
1556                const Register& rm);
1557 
1558   // CRC-32 C checksum from half-word.
1559   void crc32ch(const Register& rd,
1560                const Register& rn,
1561                const Register& rm);
1562 
1563   // CRC-32 C checksum from word.
1564   void crc32cw(const Register& rd,
1565                const Register& rn,
1566                const Register& rm);
1567 
1568   // CRC-32C checksum from double word.
1569   void crc32cx(const Register& rd,
1570                const Register& rn,
1571                const Register& rm);
1572 
1573   // Multiply.
1574   void mul(const Register& rd, const Register& rn, const Register& rm);
1575 
1576   // Negated multiply.
1577   void mneg(const Register& rd, const Register& rn, const Register& rm);
1578 
1579   // Signed long multiply: 32 x 32 -> 64-bit.
1580   void smull(const Register& rd, const Register& rn, const Register& rm);
1581 
1582   // Signed multiply high: 64 x 64 -> 64-bit <127:64>.
1583   void smulh(const Register& xd, const Register& xn, const Register& xm);
1584 
1585   // Multiply and accumulate.
1586   void madd(const Register& rd,
1587             const Register& rn,
1588             const Register& rm,
1589             const Register& ra);
1590 
1591   // Multiply and subtract.
1592   void msub(const Register& rd,
1593             const Register& rn,
1594             const Register& rm,
1595             const Register& ra);
1596 
1597   // Signed long multiply and accumulate: 32 x 32 + 64 -> 64-bit.
1598   void smaddl(const Register& rd,
1599               const Register& rn,
1600               const Register& rm,
1601               const Register& ra);
1602 
1603   // Unsigned long multiply and accumulate: 32 x 32 + 64 -> 64-bit.
1604   void umaddl(const Register& rd,
1605               const Register& rn,
1606               const Register& rm,
1607               const Register& ra);
1608 
1609   // Unsigned long multiply: 32 x 32 -> 64-bit.
umull(const Register & rd,const Register & rn,const Register & rm)1610   void umull(const Register& rd,
1611              const Register& rn,
1612              const Register& rm) {
1613     umaddl(rd, rn, rm, xzr);
1614   }
1615 
1616   // Unsigned multiply high: 64 x 64 -> 64-bit <127:64>.
1617   void umulh(const Register& xd,
1618              const Register& xn,
1619              const Register& xm);
1620 
1621   // Signed long multiply and subtract: 64 - (32 x 32) -> 64-bit.
1622   void smsubl(const Register& rd,
1623               const Register& rn,
1624               const Register& rm,
1625               const Register& ra);
1626 
1627   // Unsigned long multiply and subtract: 64 - (32 x 32) -> 64-bit.
1628   void umsubl(const Register& rd,
1629               const Register& rn,
1630               const Register& rm,
1631               const Register& ra);
1632 
1633   // Signed integer divide.
1634   void sdiv(const Register& rd, const Register& rn, const Register& rm);
1635 
1636   // Unsigned integer divide.
1637   void udiv(const Register& rd, const Register& rn, const Register& rm);
1638 
1639   // Bit reverse.
1640   void rbit(const Register& rd, const Register& rn);
1641 
1642   // Reverse bytes in 16-bit half words.
1643   void rev16(const Register& rd, const Register& rn);
1644 
1645   // Reverse bytes in 32-bit words.
1646   void rev32(const Register& rd, const Register& rn);
1647 
1648   // Reverse bytes.
1649   void rev(const Register& rd, const Register& rn);
1650 
1651   // Count leading zeroes.
1652   void clz(const Register& rd, const Register& rn);
1653 
1654   // Count leading sign bits.
1655   void cls(const Register& rd, const Register& rn);
1656 
1657   // Memory instructions.
1658   // Load integer or FP register.
1659   void ldr(const CPURegister& rt, const MemOperand& src,
1660            LoadStoreScalingOption option = PreferScaledOffset);
1661 
1662   // Store integer or FP register.
1663   void str(const CPURegister& rt, const MemOperand& dst,
1664            LoadStoreScalingOption option = PreferScaledOffset);
1665 
1666   // Load word with sign extension.
1667   void ldrsw(const Register& rt, const MemOperand& src,
1668              LoadStoreScalingOption option = PreferScaledOffset);
1669 
1670   // Load byte.
1671   void ldrb(const Register& rt, const MemOperand& src,
1672             LoadStoreScalingOption option = PreferScaledOffset);
1673 
1674   // Store byte.
1675   void strb(const Register& rt, const MemOperand& dst,
1676             LoadStoreScalingOption option = PreferScaledOffset);
1677 
1678   // Load byte with sign extension.
1679   void ldrsb(const Register& rt, const MemOperand& src,
1680              LoadStoreScalingOption option = PreferScaledOffset);
1681 
1682   // Load half-word.
1683   void ldrh(const Register& rt, const MemOperand& src,
1684             LoadStoreScalingOption option = PreferScaledOffset);
1685 
1686   // Store half-word.
1687   void strh(const Register& rt, const MemOperand& dst,
1688             LoadStoreScalingOption option = PreferScaledOffset);
1689 
1690   // Load half-word with sign extension.
1691   void ldrsh(const Register& rt, const MemOperand& src,
1692              LoadStoreScalingOption option = PreferScaledOffset);
1693 
1694   // Load integer or FP register (with unscaled offset).
1695   void ldur(const CPURegister& rt, const MemOperand& src,
1696             LoadStoreScalingOption option = PreferUnscaledOffset);
1697 
1698   // Store integer or FP register (with unscaled offset).
1699   void stur(const CPURegister& rt, const MemOperand& src,
1700             LoadStoreScalingOption option = PreferUnscaledOffset);
1701 
1702   // Load word with sign extension.
1703   void ldursw(const Register& rt, const MemOperand& src,
1704               LoadStoreScalingOption option = PreferUnscaledOffset);
1705 
1706   // Load byte (with unscaled offset).
1707   void ldurb(const Register& rt, const MemOperand& src,
1708              LoadStoreScalingOption option = PreferUnscaledOffset);
1709 
1710   // Store byte (with unscaled offset).
1711   void sturb(const Register& rt, const MemOperand& dst,
1712              LoadStoreScalingOption option = PreferUnscaledOffset);
1713 
1714   // Load byte with sign extension (and unscaled offset).
1715   void ldursb(const Register& rt, const MemOperand& src,
1716               LoadStoreScalingOption option = PreferUnscaledOffset);
1717 
1718   // Load half-word (with unscaled offset).
1719   void ldurh(const Register& rt, const MemOperand& src,
1720              LoadStoreScalingOption option = PreferUnscaledOffset);
1721 
1722   // Store half-word (with unscaled offset).
1723   void sturh(const Register& rt, const MemOperand& dst,
1724              LoadStoreScalingOption option = PreferUnscaledOffset);
1725 
1726   // Load half-word with sign extension (and unscaled offset).
1727   void ldursh(const Register& rt, const MemOperand& src,
1728               LoadStoreScalingOption option = PreferUnscaledOffset);
1729 
1730   // Load integer or FP register pair.
1731   void ldp(const CPURegister& rt, const CPURegister& rt2,
1732            const MemOperand& src);
1733 
1734   // Store integer or FP register pair.
1735   void stp(const CPURegister& rt, const CPURegister& rt2,
1736            const MemOperand& dst);
1737 
1738   // Load word pair with sign extension.
1739   void ldpsw(const Register& rt, const Register& rt2, const MemOperand& src);
1740 
1741   // Load integer or FP register pair, non-temporal.
1742   void ldnp(const CPURegister& rt, const CPURegister& rt2,
1743             const MemOperand& src);
1744 
1745   // Store integer or FP register pair, non-temporal.
1746   void stnp(const CPURegister& rt, const CPURegister& rt2,
1747             const MemOperand& dst);
1748 
1749   // Load integer or FP register from literal pool.
1750   void ldr(const CPURegister& rt, RawLiteral* literal);
1751 
1752   // Load word with sign extension from literal pool.
1753   void ldrsw(const Register& rt, RawLiteral* literal);
1754 
1755   // Load integer or FP register from pc + imm19 << 2.
1756   void ldr(const CPURegister& rt, int imm19);
1757 
1758   // Load word with sign extension from pc + imm19 << 2.
1759   void ldrsw(const Register& rt, int imm19);
1760 
1761   // Store exclusive byte.
1762   void stxrb(const Register& rs, const Register& rt, const MemOperand& dst);
1763 
1764   // Store exclusive half-word.
1765   void stxrh(const Register& rs, const Register& rt, const MemOperand& dst);
1766 
1767   // Store exclusive register.
1768   void stxr(const Register& rs, const Register& rt, const MemOperand& dst);
1769 
1770   // Load exclusive byte.
1771   void ldxrb(const Register& rt, const MemOperand& src);
1772 
1773   // Load exclusive half-word.
1774   void ldxrh(const Register& rt, const MemOperand& src);
1775 
1776   // Load exclusive register.
1777   void ldxr(const Register& rt, const MemOperand& src);
1778 
1779   // Store exclusive register pair.
1780   void stxp(const Register& rs,
1781             const Register& rt,
1782             const Register& rt2,
1783             const MemOperand& dst);
1784 
1785   // Load exclusive register pair.
1786   void ldxp(const Register& rt, const Register& rt2, const MemOperand& src);
1787 
1788   // Store-release exclusive byte.
1789   void stlxrb(const Register& rs, const Register& rt, const MemOperand& dst);
1790 
1791   // Store-release exclusive half-word.
1792   void stlxrh(const Register& rs, const Register& rt, const MemOperand& dst);
1793 
1794   // Store-release exclusive register.
1795   void stlxr(const Register& rs, const Register& rt, const MemOperand& dst);
1796 
1797   // Load-acquire exclusive byte.
1798   void ldaxrb(const Register& rt, const MemOperand& src);
1799 
1800   // Load-acquire exclusive half-word.
1801   void ldaxrh(const Register& rt, const MemOperand& src);
1802 
1803   // Load-acquire exclusive register.
1804   void ldaxr(const Register& rt, const MemOperand& src);
1805 
1806   // Store-release exclusive register pair.
1807   void stlxp(const Register& rs,
1808              const Register& rt,
1809              const Register& rt2,
1810              const MemOperand& dst);
1811 
1812   // Load-acquire exclusive register pair.
1813   void ldaxp(const Register& rt, const Register& rt2, const MemOperand& src);
1814 
1815   // Store-release byte.
1816   void stlrb(const Register& rt, const MemOperand& dst);
1817 
1818   // Store-release half-word.
1819   void stlrh(const Register& rt, const MemOperand& dst);
1820 
1821   // Store-release register.
1822   void stlr(const Register& rt, const MemOperand& dst);
1823 
1824   // Load-acquire byte.
1825   void ldarb(const Register& rt, const MemOperand& src);
1826 
1827   // Load-acquire half-word.
1828   void ldarh(const Register& rt, const MemOperand& src);
1829 
1830   // Load-acquire register.
1831   void ldar(const Register& rt, const MemOperand& src);
1832 
1833   // Prefetch memory.
1834   void prfm(PrefetchOperation op, const MemOperand& addr,
1835             LoadStoreScalingOption option = PreferScaledOffset);
1836 
1837   // Prefetch memory (with unscaled offset).
1838   void prfum(PrefetchOperation op, const MemOperand& addr,
1839              LoadStoreScalingOption option = PreferUnscaledOffset);
1840 
1841   // Prefetch memory in the literal pool.
1842   void prfm(PrefetchOperation op, RawLiteral* literal);
1843 
1844   // Prefetch from pc + imm19 << 2.
1845   void prfm(PrefetchOperation op, int imm19);
1846 
1847   // Move instructions. The default shift of -1 indicates that the move
1848   // instruction will calculate an appropriate 16-bit immediate and left shift
1849   // that is equal to the 64-bit immediate argument. If an explicit left shift
1850   // is specified (0, 16, 32 or 48), the immediate must be a 16-bit value.
1851   //
1852   // For movk, an explicit shift can be used to indicate which half word should
1853   // be overwritten, eg. movk(x0, 0, 0) will overwrite the least-significant
1854   // half word with zero, whereas movk(x0, 0, 48) will overwrite the
1855   // most-significant.
1856 
1857   // Move immediate and keep.
1858   void movk(const Register& rd, uint64_t imm, int shift = -1) {
1859     MoveWide(rd, imm, shift, MOVK);
1860   }
1861 
1862   // Move inverted immediate.
1863   void movn(const Register& rd, uint64_t imm, int shift = -1) {
1864     MoveWide(rd, imm, shift, MOVN);
1865   }
1866 
1867   // Move immediate.
1868   void movz(const Register& rd, uint64_t imm, int shift = -1) {
1869     MoveWide(rd, imm, shift, MOVZ);
1870   }
1871 
1872   // Misc instructions.
1873   // Monitor debug-mode breakpoint.
1874   void brk(int code);
1875 
1876   // Halting debug-mode breakpoint.
1877   void hlt(int code);
1878 
1879   // Generate exception targeting EL1.
1880   void svc(int code);
1881 
1882   // Move register to register.
1883   void mov(const Register& rd, const Register& rn);
1884 
1885   // Move inverted operand to register.
1886   void mvn(const Register& rd, const Operand& operand);
1887 
1888   // System instructions.
1889   // Move to register from system register.
1890   void mrs(const Register& rt, SystemRegister sysreg);
1891 
1892   // Move from register to system register.
1893   void msr(SystemRegister sysreg, const Register& rt);
1894 
1895   // System instruction.
1896   void sys(int op1, int crn, int crm, int op2, const Register& rt = xzr);
1897 
1898   // System instruction with pre-encoded op (op1:crn:crm:op2).
1899   void sys(int op, const Register& rt = xzr);
1900 
1901   // System data cache operation.
1902   void dc(DataCacheOp op, const Register& rt);
1903 
1904   // System instruction cache operation.
1905   void ic(InstructionCacheOp op, const Register& rt);
1906 
1907   // System hint.
1908   void hint(SystemHint code);
1909 
1910   // Clear exclusive monitor.
1911   void clrex(int imm4 = 0xf);
1912 
1913   // Data memory barrier.
1914   void dmb(BarrierDomain domain, BarrierType type);
1915 
1916   // Data synchronization barrier.
1917   void dsb(BarrierDomain domain, BarrierType type);
1918 
1919   // Instruction synchronization barrier.
1920   void isb();
1921 
1922   // Alias for system instructions.
1923   // No-op.
nop()1924   void nop() {
1925     hint(NOP);
1926   }
1927 
1928   // FP and NEON instructions.
1929   // Move double precision immediate to FP register.
1930   void fmov(const VRegister& vd, double imm);
1931 
1932   // Move single precision immediate to FP register.
1933   void fmov(const VRegister& vd, float imm);
1934 
1935   // Move FP register to register.
1936   void fmov(const Register& rd, const VRegister& fn);
1937 
1938   // Move register to FP register.
1939   void fmov(const VRegister& vd, const Register& rn);
1940 
1941   // Move FP register to FP register.
1942   void fmov(const VRegister& vd, const VRegister& fn);
1943 
1944   // Move 64-bit register to top half of 128-bit FP register.
1945   void fmov(const VRegister& vd, int index, const Register& rn);
1946 
1947   // Move top half of 128-bit FP register to 64-bit register.
1948   void fmov(const Register& rd, const VRegister& vn, int index);
1949 
1950   // FP add.
1951   void fadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1952 
1953   // FP subtract.
1954   void fsub(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1955 
1956   // FP multiply.
1957   void fmul(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1958 
1959   // FP fused multiply-add.
1960   void fmadd(const VRegister& vd,
1961              const VRegister& vn,
1962              const VRegister& vm,
1963              const VRegister& va);
1964 
1965   // FP fused multiply-subtract.
1966   void fmsub(const VRegister& vd,
1967              const VRegister& vn,
1968              const VRegister& vm,
1969              const VRegister& va);
1970 
1971   // FP fused multiply-add and negate.
1972   void fnmadd(const VRegister& vd,
1973               const VRegister& vn,
1974               const VRegister& vm,
1975               const VRegister& va);
1976 
1977   // FP fused multiply-subtract and negate.
1978   void fnmsub(const VRegister& vd,
1979               const VRegister& vn,
1980               const VRegister& vm,
1981               const VRegister& va);
1982 
1983   // FP multiply-negate scalar.
1984   void fnmul(const VRegister& vd,
1985              const VRegister& vn,
1986              const VRegister& vm);
1987 
1988   // FP reciprocal exponent scalar.
1989   void frecpx(const VRegister& vd,
1990               const VRegister& vn);
1991 
1992   // FP divide.
1993   void fdiv(const VRegister& vd, const VRegister& fn, const VRegister& vm);
1994 
1995   // FP maximum.
1996   void fmax(const VRegister& vd, const VRegister& fn, const VRegister& vm);
1997 
1998   // FP minimum.
1999   void fmin(const VRegister& vd, const VRegister& fn, const VRegister& vm);
2000 
2001   // FP maximum number.
2002   void fmaxnm(const VRegister& vd, const VRegister& fn, const VRegister& vm);
2003 
2004   // FP minimum number.
2005   void fminnm(const VRegister& vd, const VRegister& fn, const VRegister& vm);
2006 
2007   // FP absolute.
2008   void fabs(const VRegister& vd, const VRegister& vn);
2009 
2010   // FP negate.
2011   void fneg(const VRegister& vd, const VRegister& vn);
2012 
2013   // FP square root.
2014   void fsqrt(const VRegister& vd, const VRegister& vn);
2015 
2016   // FP round to integer, nearest with ties to away.
2017   void frinta(const VRegister& vd, const VRegister& vn);
2018 
2019   // FP round to integer, implicit rounding.
2020   void frinti(const VRegister& vd, const VRegister& vn);
2021 
2022   // FP round to integer, toward minus infinity.
2023   void frintm(const VRegister& vd, const VRegister& vn);
2024 
2025   // FP round to integer, nearest with ties to even.
2026   void frintn(const VRegister& vd, const VRegister& vn);
2027 
2028   // FP round to integer, toward plus infinity.
2029   void frintp(const VRegister& vd, const VRegister& vn);
2030 
2031   // FP round to integer, exact, implicit rounding.
2032   void frintx(const VRegister& vd, const VRegister& vn);
2033 
2034   // FP round to integer, towards zero.
2035   void frintz(const VRegister& vd, const VRegister& vn);
2036 
2037   void FPCompareMacro(const VRegister& vn,
2038                       double value,
2039                       FPTrapFlags trap);
2040 
2041   void FPCompareMacro(const VRegister& vn,
2042                       const VRegister& vm,
2043                       FPTrapFlags trap);
2044 
2045   // FP compare registers.
2046   void fcmp(const VRegister& vn, const VRegister& vm);
2047 
2048   // FP compare immediate.
2049   void fcmp(const VRegister& vn, double value);
2050 
2051   void FPCCompareMacro(const VRegister& vn,
2052                        const VRegister& vm,
2053                        StatusFlags nzcv,
2054                        Condition cond,
2055                        FPTrapFlags trap);
2056 
2057   // FP conditional compare.
2058   void fccmp(const VRegister& vn,
2059              const VRegister& vm,
2060              StatusFlags nzcv,
2061              Condition cond);
2062 
2063   // FP signaling compare registers.
2064   void fcmpe(const VRegister& vn, const VRegister& vm);
2065 
2066   // FP signaling compare immediate.
2067   void fcmpe(const VRegister& vn, double value);
2068 
2069   // FP conditional signaling compare.
2070   void fccmpe(const VRegister& vn,
2071               const VRegister& vm,
2072               StatusFlags nzcv,
2073               Condition cond);
2074 
2075   // FP conditional select.
2076   void fcsel(const VRegister& vd,
2077              const VRegister& vn,
2078              const VRegister& vm,
2079              Condition cond);
2080 
2081   // Common FP Convert functions.
2082   void NEONFPConvertToInt(const Register& rd,
2083                           const VRegister& vn,
2084                           Instr op);
2085   void NEONFPConvertToInt(const VRegister& vd,
2086                           const VRegister& vn,
2087                           Instr op);
2088 
2089   // FP convert between precisions.
2090   void fcvt(const VRegister& vd, const VRegister& vn);
2091 
2092   // FP convert to higher precision.
2093   void fcvtl(const VRegister& vd, const VRegister& vn);
2094 
2095   // FP convert to higher precision (second part).
2096   void fcvtl2(const VRegister& vd, const VRegister& vn);
2097 
2098   // FP convert to lower precision.
2099   void fcvtn(const VRegister& vd, const VRegister& vn);
2100 
2101   // FP convert to lower prevision (second part).
2102   void fcvtn2(const VRegister& vd, const VRegister& vn);
2103 
2104   // FP convert to lower precision, rounding to odd.
2105   void fcvtxn(const VRegister& vd, const VRegister& vn);
2106 
2107   // FP convert to lower precision, rounding to odd (second part).
2108   void fcvtxn2(const VRegister& vd, const VRegister& vn);
2109 
2110   // FP convert to signed integer, nearest with ties to away.
2111   void fcvtas(const Register& rd, const VRegister& vn);
2112 
2113   // FP convert to unsigned integer, nearest with ties to away.
2114   void fcvtau(const Register& rd, const VRegister& vn);
2115 
2116   // FP convert to signed integer, nearest with ties to away.
2117   void fcvtas(const VRegister& vd, const VRegister& vn);
2118 
2119   // FP convert to unsigned integer, nearest with ties to away.
2120   void fcvtau(const VRegister& vd, const VRegister& vn);
2121 
2122   // FP convert to signed integer, round towards -infinity.
2123   void fcvtms(const Register& rd, const VRegister& vn);
2124 
2125   // FP convert to unsigned integer, round towards -infinity.
2126   void fcvtmu(const Register& rd, const VRegister& vn);
2127 
2128   // FP convert to signed integer, round towards -infinity.
2129   void fcvtms(const VRegister& vd, const VRegister& vn);
2130 
2131   // FP convert to unsigned integer, round towards -infinity.
2132   void fcvtmu(const VRegister& vd, const VRegister& vn);
2133 
2134   // FP convert to signed integer, nearest with ties to even.
2135   void fcvtns(const Register& rd, const VRegister& vn);
2136 
2137   // FP convert to unsigned integer, nearest with ties to even.
2138   void fcvtnu(const Register& rd, const VRegister& vn);
2139 
2140   // FP convert to signed integer, nearest with ties to even.
2141   void fcvtns(const VRegister& rd, const VRegister& vn);
2142 
2143   // FP convert to unsigned integer, nearest with ties to even.
2144   void fcvtnu(const VRegister& rd, const VRegister& vn);
2145 
2146   // FP convert to signed integer or fixed-point, round towards zero.
2147   void fcvtzs(const Register& rd, const VRegister& vn, int fbits = 0);
2148 
2149   // FP convert to unsigned integer or fixed-point, round towards zero.
2150   void fcvtzu(const Register& rd, const VRegister& vn, int fbits = 0);
2151 
2152   // FP convert to signed integer or fixed-point, round towards zero.
2153   void fcvtzs(const VRegister& vd, const VRegister& vn, int fbits = 0);
2154 
2155   // FP convert to unsigned integer or fixed-point, round towards zero.
2156   void fcvtzu(const VRegister& vd, const VRegister& vn, int fbits = 0);
2157 
2158   // FP convert to signed integer, round towards +infinity.
2159   void fcvtps(const Register& rd, const VRegister& vn);
2160 
2161   // FP convert to unsigned integer, round towards +infinity.
2162   void fcvtpu(const Register& rd, const VRegister& vn);
2163 
2164   // FP convert to signed integer, round towards +infinity.
2165   void fcvtps(const VRegister& vd, const VRegister& vn);
2166 
2167   // FP convert to unsigned integer, round towards +infinity.
2168   void fcvtpu(const VRegister& vd, const VRegister& vn);
2169 
2170   // Convert signed integer or fixed point to FP.
2171   void scvtf(const VRegister& fd, const Register& rn, int fbits = 0);
2172 
2173   // Convert unsigned integer or fixed point to FP.
2174   void ucvtf(const VRegister& fd, const Register& rn, int fbits = 0);
2175 
2176   // Convert signed integer or fixed-point to FP.
2177   void scvtf(const VRegister& fd, const VRegister& vn, int fbits = 0);
2178 
2179   // Convert unsigned integer or fixed-point to FP.
2180   void ucvtf(const VRegister& fd, const VRegister& vn, int fbits = 0);
2181 
2182   // Unsigned absolute difference.
2183   void uabd(const VRegister& vd,
2184             const VRegister& vn,
2185             const VRegister& vm);
2186 
2187   // Signed absolute difference.
2188   void sabd(const VRegister& vd,
2189             const VRegister& vn,
2190             const VRegister& vm);
2191 
2192   // Unsigned absolute difference and accumulate.
2193   void uaba(const VRegister& vd,
2194             const VRegister& vn,
2195             const VRegister& vm);
2196 
2197   // Signed absolute difference and accumulate.
2198   void saba(const VRegister& vd,
2199             const VRegister& vn,
2200             const VRegister& vm);
2201 
2202   // Add.
2203   void add(const VRegister& vd,
2204            const VRegister& vn,
2205            const VRegister& vm);
2206 
2207   // Subtract.
2208   void sub(const VRegister& vd,
2209            const VRegister& vn,
2210            const VRegister& vm);
2211 
2212   // Unsigned halving add.
2213   void uhadd(const VRegister& vd,
2214              const VRegister& vn,
2215              const VRegister& vm);
2216 
2217   // Signed halving add.
2218   void shadd(const VRegister& vd,
2219              const VRegister& vn,
2220              const VRegister& vm);
2221 
2222   // Unsigned rounding halving add.
2223   void urhadd(const VRegister& vd,
2224               const VRegister& vn,
2225               const VRegister& vm);
2226 
2227   // Signed rounding halving add.
2228   void srhadd(const VRegister& vd,
2229               const VRegister& vn,
2230               const VRegister& vm);
2231 
2232   // Unsigned halving sub.
2233   void uhsub(const VRegister& vd,
2234              const VRegister& vn,
2235              const VRegister& vm);
2236 
2237   // Signed halving sub.
2238   void shsub(const VRegister& vd,
2239              const VRegister& vn,
2240              const VRegister& vm);
2241 
2242   // Unsigned saturating add.
2243   void uqadd(const VRegister& vd,
2244              const VRegister& vn,
2245              const VRegister& vm);
2246 
2247   // Signed saturating add.
2248   void sqadd(const VRegister& vd,
2249              const VRegister& vn,
2250              const VRegister& vm);
2251 
2252   // Unsigned saturating subtract.
2253   void uqsub(const VRegister& vd,
2254              const VRegister& vn,
2255              const VRegister& vm);
2256 
2257   // Signed saturating subtract.
2258   void sqsub(const VRegister& vd,
2259              const VRegister& vn,
2260              const VRegister& vm);
2261 
2262   // Add pairwise.
2263   void addp(const VRegister& vd,
2264             const VRegister& vn,
2265             const VRegister& vm);
2266 
2267   // Add pair of elements scalar.
2268   void addp(const VRegister& vd,
2269             const VRegister& vn);
2270 
2271   // Multiply-add to accumulator.
2272   void mla(const VRegister& vd,
2273            const VRegister& vn,
2274            const VRegister& vm);
2275 
2276   // Multiply-subtract to accumulator.
2277   void mls(const VRegister& vd,
2278            const VRegister& vn,
2279            const VRegister& vm);
2280 
2281   // Multiply.
2282   void mul(const VRegister& vd,
2283            const VRegister& vn,
2284            const VRegister& vm);
2285 
2286   // Multiply by scalar element.
2287   void mul(const VRegister& vd,
2288            const VRegister& vn,
2289            const VRegister& vm,
2290            int vm_index);
2291 
2292   // Multiply-add by scalar element.
2293   void mla(const VRegister& vd,
2294            const VRegister& vn,
2295            const VRegister& vm,
2296            int vm_index);
2297 
2298   // Multiply-subtract by scalar element.
2299   void mls(const VRegister& vd,
2300            const VRegister& vn,
2301            const VRegister& vm,
2302            int vm_index);
2303 
2304   // Signed long multiply-add by scalar element.
2305   void smlal(const VRegister& vd,
2306              const VRegister& vn,
2307              const VRegister& vm,
2308              int vm_index);
2309 
2310   // Signed long multiply-add by scalar element (second part).
2311   void smlal2(const VRegister& vd,
2312               const VRegister& vn,
2313               const VRegister& vm,
2314               int vm_index);
2315 
2316   // Unsigned long multiply-add by scalar element.
2317   void umlal(const VRegister& vd,
2318              const VRegister& vn,
2319              const VRegister& vm,
2320              int vm_index);
2321 
2322   // Unsigned long multiply-add by scalar element (second part).
2323   void umlal2(const VRegister& vd,
2324               const VRegister& vn,
2325               const VRegister& vm,
2326               int vm_index);
2327 
2328   // Signed long multiply-sub by scalar element.
2329   void smlsl(const VRegister& vd,
2330              const VRegister& vn,
2331              const VRegister& vm,
2332              int vm_index);
2333 
2334   // Signed long multiply-sub by scalar element (second part).
2335   void smlsl2(const VRegister& vd,
2336               const VRegister& vn,
2337               const VRegister& vm,
2338               int vm_index);
2339 
2340   // Unsigned long multiply-sub by scalar element.
2341   void umlsl(const VRegister& vd,
2342              const VRegister& vn,
2343              const VRegister& vm,
2344              int vm_index);
2345 
2346   // Unsigned long multiply-sub by scalar element (second part).
2347   void umlsl2(const VRegister& vd,
2348               const VRegister& vn,
2349               const VRegister& vm,
2350               int vm_index);
2351 
2352   // Signed long multiply by scalar element.
2353   void smull(const VRegister& vd,
2354              const VRegister& vn,
2355              const VRegister& vm,
2356              int vm_index);
2357 
2358   // Signed long multiply by scalar element (second part).
2359   void smull2(const VRegister& vd,
2360               const VRegister& vn,
2361               const VRegister& vm,
2362               int vm_index);
2363 
2364   // Unsigned long multiply by scalar element.
2365   void umull(const VRegister& vd,
2366              const VRegister& vn,
2367              const VRegister& vm,
2368              int vm_index);
2369 
2370   // Unsigned long multiply by scalar element (second part).
2371   void umull2(const VRegister& vd,
2372               const VRegister& vn,
2373               const VRegister& vm,
2374               int vm_index);
2375 
2376   // Signed saturating double long multiply by element.
2377   void sqdmull(const VRegister& vd,
2378                const VRegister& vn,
2379                const VRegister& vm,
2380                int vm_index);
2381 
2382   // Signed saturating double long multiply by element (second part).
2383   void sqdmull2(const VRegister& vd,
2384                 const VRegister& vn,
2385                 const VRegister& vm,
2386                 int vm_index);
2387 
2388   // Signed saturating doubling long multiply-add by element.
2389   void sqdmlal(const VRegister& vd,
2390                const VRegister& vn,
2391                const VRegister& vm,
2392                int vm_index);
2393 
2394   // Signed saturating doubling long multiply-add by element (second part).
2395   void sqdmlal2(const VRegister& vd,
2396                 const VRegister& vn,
2397                 const VRegister& vm,
2398                 int vm_index);
2399 
2400   // Signed saturating doubling long multiply-sub by element.
2401   void sqdmlsl(const VRegister& vd,
2402                const VRegister& vn,
2403                const VRegister& vm,
2404                int vm_index);
2405 
2406   // Signed saturating doubling long multiply-sub by element (second part).
2407   void sqdmlsl2(const VRegister& vd,
2408                 const VRegister& vn,
2409                 const VRegister& vm,
2410                 int vm_index);
2411 
2412   // Compare equal.
2413   void cmeq(const VRegister& vd,
2414             const VRegister& vn,
2415             const VRegister& vm);
2416 
2417   // Compare signed greater than or equal.
2418   void cmge(const VRegister& vd,
2419             const VRegister& vn,
2420             const VRegister& vm);
2421 
2422   // Compare signed greater than.
2423   void cmgt(const VRegister& vd,
2424             const VRegister& vn,
2425             const VRegister& vm);
2426 
2427   // Compare unsigned higher.
2428   void cmhi(const VRegister& vd,
2429             const VRegister& vn,
2430             const VRegister& vm);
2431 
2432   // Compare unsigned higher or same.
2433   void cmhs(const VRegister& vd,
2434             const VRegister& vn,
2435             const VRegister& vm);
2436 
2437   // Compare bitwise test bits nonzero.
2438   void cmtst(const VRegister& vd,
2439              const VRegister& vn,
2440              const VRegister& vm);
2441 
2442   // Compare bitwise to zero.
2443   void cmeq(const VRegister& vd,
2444             const VRegister& vn,
2445             int value);
2446 
2447   // Compare signed greater than or equal to zero.
2448   void cmge(const VRegister& vd,
2449             const VRegister& vn,
2450             int value);
2451 
2452   // Compare signed greater than zero.
2453   void cmgt(const VRegister& vd,
2454             const VRegister& vn,
2455             int value);
2456 
2457   // Compare signed less than or equal to zero.
2458   void cmle(const VRegister& vd,
2459             const VRegister& vn,
2460             int value);
2461 
2462   // Compare signed less than zero.
2463   void cmlt(const VRegister& vd,
2464             const VRegister& vn,
2465             int value);
2466 
2467   // Signed shift left by register.
2468   void sshl(const VRegister& vd,
2469             const VRegister& vn,
2470             const VRegister& vm);
2471 
2472   // Unsigned shift left by register.
2473   void ushl(const VRegister& vd,
2474             const VRegister& vn,
2475             const VRegister& vm);
2476 
2477   // Signed saturating shift left by register.
2478   void sqshl(const VRegister& vd,
2479              const VRegister& vn,
2480              const VRegister& vm);
2481 
2482   // Unsigned saturating shift left by register.
2483   void uqshl(const VRegister& vd,
2484              const VRegister& vn,
2485              const VRegister& vm);
2486 
2487   // Signed rounding shift left by register.
2488   void srshl(const VRegister& vd,
2489              const VRegister& vn,
2490              const VRegister& vm);
2491 
2492   // Unsigned rounding shift left by register.
2493   void urshl(const VRegister& vd,
2494              const VRegister& vn,
2495              const VRegister& vm);
2496 
2497   // Signed saturating rounding shift left by register.
2498   void sqrshl(const VRegister& vd,
2499               const VRegister& vn,
2500               const VRegister& vm);
2501 
2502   // Unsigned saturating rounding shift left by register.
2503   void uqrshl(const VRegister& vd,
2504               const VRegister& vn,
2505               const VRegister& vm);
2506 
2507   // Bitwise and.
2508   void and_(const VRegister& vd,
2509             const VRegister& vn,
2510             const VRegister& vm);
2511 
2512   // Bitwise or.
2513   void orr(const VRegister& vd,
2514            const VRegister& vn,
2515            const VRegister& vm);
2516 
2517   // Bitwise or immediate.
2518   void orr(const VRegister& vd,
2519            const int imm8,
2520            const int left_shift = 0);
2521 
2522   // Move register to register.
2523   void mov(const VRegister& vd,
2524            const VRegister& vn);
2525 
2526   // Bitwise orn.
2527   void orn(const VRegister& vd,
2528            const VRegister& vn,
2529            const VRegister& vm);
2530 
2531   // Bitwise eor.
2532   void eor(const VRegister& vd,
2533            const VRegister& vn,
2534            const VRegister& vm);
2535 
2536   // Bit clear immediate.
2537   void bic(const VRegister& vd,
2538            const int imm8,
2539            const int left_shift = 0);
2540 
2541   // Bit clear.
2542   void bic(const VRegister& vd,
2543            const VRegister& vn,
2544            const VRegister& vm);
2545 
2546   // Bitwise insert if false.
2547   void bif(const VRegister& vd,
2548            const VRegister& vn,
2549            const VRegister& vm);
2550 
2551   // Bitwise insert if true.
2552   void bit(const VRegister& vd,
2553            const VRegister& vn,
2554            const VRegister& vm);
2555 
2556   // Bitwise select.
2557   void bsl(const VRegister& vd,
2558            const VRegister& vn,
2559            const VRegister& vm);
2560 
2561   // Polynomial multiply.
2562   void pmul(const VRegister& vd,
2563             const VRegister& vn,
2564             const VRegister& vm);
2565 
2566   // Vector move immediate.
2567   void movi(const VRegister& vd,
2568             const uint64_t imm,
2569             Shift shift = LSL,
2570             const int shift_amount = 0);
2571 
2572   // Bitwise not.
2573   void mvn(const VRegister& vd,
2574            const VRegister& vn);
2575 
2576   // Vector move inverted immediate.
2577   void mvni(const VRegister& vd,
2578             const int imm8,
2579             Shift shift = LSL,
2580             const int shift_amount = 0);
2581 
2582   // Signed saturating accumulate of unsigned value.
2583   void suqadd(const VRegister& vd,
2584               const VRegister& vn);
2585 
2586   // Unsigned saturating accumulate of signed value.
2587   void usqadd(const VRegister& vd,
2588               const VRegister& vn);
2589 
2590   // Absolute value.
2591   void abs(const VRegister& vd,
2592            const VRegister& vn);
2593 
2594   // Signed saturating absolute value.
2595   void sqabs(const VRegister& vd,
2596              const VRegister& vn);
2597 
2598   // Negate.
2599   void neg(const VRegister& vd,
2600            const VRegister& vn);
2601 
2602   // Signed saturating negate.
2603   void sqneg(const VRegister& vd,
2604              const VRegister& vn);
2605 
2606   // Bitwise not.
2607   void not_(const VRegister& vd,
2608             const VRegister& vn);
2609 
2610   // Extract narrow.
2611   void xtn(const VRegister& vd,
2612            const VRegister& vn);
2613 
2614   // Extract narrow (second part).
2615   void xtn2(const VRegister& vd,
2616             const VRegister& vn);
2617 
2618   // Signed saturating extract narrow.
2619   void sqxtn(const VRegister& vd,
2620              const VRegister& vn);
2621 
2622   // Signed saturating extract narrow (second part).
2623   void sqxtn2(const VRegister& vd,
2624               const VRegister& vn);
2625 
2626   // Unsigned saturating extract narrow.
2627   void uqxtn(const VRegister& vd,
2628              const VRegister& vn);
2629 
2630   // Unsigned saturating extract narrow (second part).
2631   void uqxtn2(const VRegister& vd,
2632               const VRegister& vn);
2633 
2634   // Signed saturating extract unsigned narrow.
2635   void sqxtun(const VRegister& vd,
2636               const VRegister& vn);
2637 
2638   // Signed saturating extract unsigned narrow (second part).
2639   void sqxtun2(const VRegister& vd,
2640                const VRegister& vn);
2641 
2642   // Extract vector from pair of vectors.
2643   void ext(const VRegister& vd,
2644            const VRegister& vn,
2645            const VRegister& vm,
2646            int index);
2647 
2648   // Duplicate vector element to vector or scalar.
2649   void dup(const VRegister& vd,
2650            const VRegister& vn,
2651            int vn_index);
2652 
2653   // Move vector element to scalar.
2654   void mov(const VRegister& vd,
2655            const VRegister& vn,
2656            int vn_index);
2657 
2658   // Duplicate general-purpose register to vector.
2659   void dup(const VRegister& vd,
2660            const Register& rn);
2661 
2662   // Insert vector element from another vector element.
2663   void ins(const VRegister& vd,
2664            int vd_index,
2665            const VRegister& vn,
2666            int vn_index);
2667 
2668   // Move vector element to another vector element.
2669   void mov(const VRegister& vd,
2670            int vd_index,
2671            const VRegister& vn,
2672            int vn_index);
2673 
2674   // Insert vector element from general-purpose register.
2675   void ins(const VRegister& vd,
2676            int vd_index,
2677            const Register& rn);
2678 
2679   // Move general-purpose register to a vector element.
2680   void mov(const VRegister& vd,
2681            int vd_index,
2682            const Register& rn);
2683 
2684   // Unsigned move vector element to general-purpose register.
2685   void umov(const Register& rd,
2686             const VRegister& vn,
2687             int vn_index);
2688 
2689   // Move vector element to general-purpose register.
2690   void mov(const Register& rd,
2691            const VRegister& vn,
2692            int vn_index);
2693 
2694   // Signed move vector element to general-purpose register.
2695   void smov(const Register& rd,
2696             const VRegister& vn,
2697             int vn_index);
2698 
2699   // One-element structure load to one register.
2700   void ld1(const VRegister& vt,
2701            const MemOperand& src);
2702 
2703   // One-element structure load to two registers.
2704   void ld1(const VRegister& vt,
2705            const VRegister& vt2,
2706            const MemOperand& src);
2707 
2708   // One-element structure load to three registers.
2709   void ld1(const VRegister& vt,
2710            const VRegister& vt2,
2711            const VRegister& vt3,
2712            const MemOperand& src);
2713 
2714   // One-element structure load to four registers.
2715   void ld1(const VRegister& vt,
2716            const VRegister& vt2,
2717            const VRegister& vt3,
2718            const VRegister& vt4,
2719            const MemOperand& src);
2720 
2721   // One-element single structure load to one lane.
2722   void ld1(const VRegister& vt,
2723            int lane,
2724            const MemOperand& src);
2725 
2726   // One-element single structure load to all lanes.
2727   void ld1r(const VRegister& vt,
2728             const MemOperand& src);
2729 
2730   // Two-element structure load.
2731   void ld2(const VRegister& vt,
2732            const VRegister& vt2,
2733            const MemOperand& src);
2734 
2735   // Two-element single structure load to one lane.
2736   void ld2(const VRegister& vt,
2737            const VRegister& vt2,
2738            int lane,
2739            const MemOperand& src);
2740 
2741   // Two-element single structure load to all lanes.
2742   void ld2r(const VRegister& vt,
2743             const VRegister& vt2,
2744             const MemOperand& src);
2745 
2746   // Three-element structure load.
2747   void ld3(const VRegister& vt,
2748            const VRegister& vt2,
2749            const VRegister& vt3,
2750            const MemOperand& src);
2751 
2752   // Three-element single structure load to one lane.
2753   void ld3(const VRegister& vt,
2754            const VRegister& vt2,
2755            const VRegister& vt3,
2756            int lane,
2757            const MemOperand& src);
2758 
2759   // Three-element single structure load to all lanes.
2760   void ld3r(const VRegister& vt,
2761             const VRegister& vt2,
2762             const VRegister& vt3,
2763             const MemOperand& src);
2764 
2765   // Four-element structure load.
2766   void ld4(const VRegister& vt,
2767            const VRegister& vt2,
2768            const VRegister& vt3,
2769            const VRegister& vt4,
2770            const MemOperand& src);
2771 
2772   // Four-element single structure load to one lane.
2773   void ld4(const VRegister& vt,
2774            const VRegister& vt2,
2775            const VRegister& vt3,
2776            const VRegister& vt4,
2777            int lane,
2778            const MemOperand& src);
2779 
2780   // Four-element single structure load to all lanes.
2781   void ld4r(const VRegister& vt,
2782             const VRegister& vt2,
2783             const VRegister& vt3,
2784             const VRegister& vt4,
2785             const MemOperand& src);
2786 
2787   // Count leading sign bits.
2788   void cls(const VRegister& vd,
2789            const VRegister& vn);
2790 
2791   // Count leading zero bits (vector).
2792   void clz(const VRegister& vd,
2793            const VRegister& vn);
2794 
2795   // Population count per byte.
2796   void cnt(const VRegister& vd,
2797            const VRegister& vn);
2798 
2799   // Reverse bit order.
2800   void rbit(const VRegister& vd,
2801             const VRegister& vn);
2802 
2803   // Reverse elements in 16-bit halfwords.
2804   void rev16(const VRegister& vd,
2805              const VRegister& vn);
2806 
2807   // Reverse elements in 32-bit words.
2808   void rev32(const VRegister& vd,
2809              const VRegister& vn);
2810 
2811   // Reverse elements in 64-bit doublewords.
2812   void rev64(const VRegister& vd,
2813              const VRegister& vn);
2814 
2815   // Unsigned reciprocal square root estimate.
2816   void ursqrte(const VRegister& vd,
2817                const VRegister& vn);
2818 
2819   // Unsigned reciprocal estimate.
2820   void urecpe(const VRegister& vd,
2821               const VRegister& vn);
2822 
2823   // Signed pairwise long add.
2824   void saddlp(const VRegister& vd,
2825               const VRegister& vn);
2826 
2827   // Unsigned pairwise long add.
2828   void uaddlp(const VRegister& vd,
2829               const VRegister& vn);
2830 
2831   // Signed pairwise long add and accumulate.
2832   void sadalp(const VRegister& vd,
2833               const VRegister& vn);
2834 
2835   // Unsigned pairwise long add and accumulate.
2836   void uadalp(const VRegister& vd,
2837               const VRegister& vn);
2838 
2839   // Shift left by immediate.
2840   void shl(const VRegister& vd,
2841            const VRegister& vn,
2842            int shift);
2843 
2844   // Signed saturating shift left by immediate.
2845   void sqshl(const VRegister& vd,
2846              const VRegister& vn,
2847              int shift);
2848 
2849   // Signed saturating shift left unsigned by immediate.
2850   void sqshlu(const VRegister& vd,
2851               const VRegister& vn,
2852               int shift);
2853 
2854   // Unsigned saturating shift left by immediate.
2855   void uqshl(const VRegister& vd,
2856              const VRegister& vn,
2857              int shift);
2858 
2859   // Signed shift left long by immediate.
2860   void sshll(const VRegister& vd,
2861              const VRegister& vn,
2862              int shift);
2863 
2864   // Signed shift left long by immediate (second part).
2865   void sshll2(const VRegister& vd,
2866               const VRegister& vn,
2867               int shift);
2868 
2869   // Signed extend long.
2870   void sxtl(const VRegister& vd,
2871             const VRegister& vn);
2872 
2873   // Signed extend long (second part).
2874   void sxtl2(const VRegister& vd,
2875              const VRegister& vn);
2876 
2877   // Unsigned shift left long by immediate.
2878   void ushll(const VRegister& vd,
2879              const VRegister& vn,
2880              int shift);
2881 
2882   // Unsigned shift left long by immediate (second part).
2883   void ushll2(const VRegister& vd,
2884               const VRegister& vn,
2885               int shift);
2886 
2887   // Shift left long by element size.
2888   void shll(const VRegister& vd,
2889             const VRegister& vn,
2890             int shift);
2891 
2892   // Shift left long by element size (second part).
2893   void shll2(const VRegister& vd,
2894              const VRegister& vn,
2895              int shift);
2896 
2897   // Unsigned extend long.
2898   void uxtl(const VRegister& vd,
2899             const VRegister& vn);
2900 
2901   // Unsigned extend long (second part).
2902   void uxtl2(const VRegister& vd,
2903              const VRegister& vn);
2904 
2905   // Shift left by immediate and insert.
2906   void sli(const VRegister& vd,
2907            const VRegister& vn,
2908            int shift);
2909 
2910   // Shift right by immediate and insert.
2911   void sri(const VRegister& vd,
2912            const VRegister& vn,
2913            int shift);
2914 
2915   // Signed maximum.
2916   void smax(const VRegister& vd,
2917             const VRegister& vn,
2918             const VRegister& vm);
2919 
2920   // Signed pairwise maximum.
2921   void smaxp(const VRegister& vd,
2922              const VRegister& vn,
2923              const VRegister& vm);
2924 
2925   // Add across vector.
2926   void addv(const VRegister& vd,
2927             const VRegister& vn);
2928 
2929   // Signed add long across vector.
2930   void saddlv(const VRegister& vd,
2931               const VRegister& vn);
2932 
2933   // Unsigned add long across vector.
2934   void uaddlv(const VRegister& vd,
2935               const VRegister& vn);
2936 
2937   // FP maximum number across vector.
2938   void fmaxnmv(const VRegister& vd,
2939                const VRegister& vn);
2940 
2941   // FP maximum across vector.
2942   void fmaxv(const VRegister& vd,
2943              const VRegister& vn);
2944 
2945   // FP minimum number across vector.
2946   void fminnmv(const VRegister& vd,
2947                const VRegister& vn);
2948 
2949   // FP minimum across vector.
2950   void fminv(const VRegister& vd,
2951              const VRegister& vn);
2952 
2953   // Signed maximum across vector.
2954   void smaxv(const VRegister& vd,
2955              const VRegister& vn);
2956 
2957   // Signed minimum.
2958   void smin(const VRegister& vd,
2959             const VRegister& vn,
2960             const VRegister& vm);
2961 
2962   // Signed minimum pairwise.
2963   void sminp(const VRegister& vd,
2964              const VRegister& vn,
2965              const VRegister& vm);
2966 
2967   // Signed minimum across vector.
2968   void sminv(const VRegister& vd,
2969              const VRegister& vn);
2970 
2971   // One-element structure store from one register.
2972   void st1(const VRegister& vt,
2973            const MemOperand& src);
2974 
2975   // One-element structure store from two registers.
2976   void st1(const VRegister& vt,
2977            const VRegister& vt2,
2978            const MemOperand& src);
2979 
2980   // One-element structure store from three registers.
2981   void st1(const VRegister& vt,
2982            const VRegister& vt2,
2983            const VRegister& vt3,
2984            const MemOperand& src);
2985 
2986   // One-element structure store from four registers.
2987   void st1(const VRegister& vt,
2988            const VRegister& vt2,
2989            const VRegister& vt3,
2990            const VRegister& vt4,
2991            const MemOperand& src);
2992 
2993   // One-element single structure store from one lane.
2994   void st1(const VRegister& vt,
2995            int lane,
2996            const MemOperand& src);
2997 
2998   // Two-element structure store from two registers.
2999   void st2(const VRegister& vt,
3000            const VRegister& vt2,
3001            const MemOperand& src);
3002 
3003   // Two-element single structure store from two lanes.
3004   void st2(const VRegister& vt,
3005            const VRegister& vt2,
3006            int lane,
3007            const MemOperand& src);
3008 
3009   // Three-element structure store from three registers.
3010   void st3(const VRegister& vt,
3011            const VRegister& vt2,
3012            const VRegister& vt3,
3013            const MemOperand& src);
3014 
3015   // Three-element single structure store from three lanes.
3016   void st3(const VRegister& vt,
3017            const VRegister& vt2,
3018            const VRegister& vt3,
3019            int lane,
3020            const MemOperand& src);
3021 
3022   // Four-element structure store from four registers.
3023   void st4(const VRegister& vt,
3024            const VRegister& vt2,
3025            const VRegister& vt3,
3026            const VRegister& vt4,
3027            const MemOperand& src);
3028 
3029   // Four-element single structure store from four lanes.
3030   void st4(const VRegister& vt,
3031            const VRegister& vt2,
3032            const VRegister& vt3,
3033            const VRegister& vt4,
3034            int lane,
3035            const MemOperand& src);
3036 
3037   // Unsigned add long.
3038   void uaddl(const VRegister& vd,
3039              const VRegister& vn,
3040              const VRegister& vm);
3041 
3042   // Unsigned add long (second part).
3043   void uaddl2(const VRegister& vd,
3044               const VRegister& vn,
3045               const VRegister& vm);
3046 
3047   // Unsigned add wide.
3048   void uaddw(const VRegister& vd,
3049              const VRegister& vn,
3050              const VRegister& vm);
3051 
3052   // Unsigned add wide (second part).
3053   void uaddw2(const VRegister& vd,
3054               const VRegister& vn,
3055               const VRegister& vm);
3056 
3057   // Signed add long.
3058   void saddl(const VRegister& vd,
3059              const VRegister& vn,
3060              const VRegister& vm);
3061 
3062   // Signed add long (second part).
3063   void saddl2(const VRegister& vd,
3064               const VRegister& vn,
3065               const VRegister& vm);
3066 
3067   // Signed add wide.
3068   void saddw(const VRegister& vd,
3069              const VRegister& vn,
3070              const VRegister& vm);
3071 
3072   // Signed add wide (second part).
3073   void saddw2(const VRegister& vd,
3074               const VRegister& vn,
3075               const VRegister& vm);
3076 
3077   // Unsigned subtract long.
3078   void usubl(const VRegister& vd,
3079              const VRegister& vn,
3080              const VRegister& vm);
3081 
3082   // Unsigned subtract long (second part).
3083   void usubl2(const VRegister& vd,
3084               const VRegister& vn,
3085               const VRegister& vm);
3086 
3087   // Unsigned subtract wide.
3088   void usubw(const VRegister& vd,
3089              const VRegister& vn,
3090              const VRegister& vm);
3091 
3092   // Unsigned subtract wide (second part).
3093   void usubw2(const VRegister& vd,
3094               const VRegister& vn,
3095               const VRegister& vm);
3096 
3097   // Signed subtract long.
3098   void ssubl(const VRegister& vd,
3099              const VRegister& vn,
3100              const VRegister& vm);
3101 
3102   // Signed subtract long (second part).
3103   void ssubl2(const VRegister& vd,
3104               const VRegister& vn,
3105               const VRegister& vm);
3106 
3107   // Signed integer subtract wide.
3108   void ssubw(const VRegister& vd,
3109              const VRegister& vn,
3110              const VRegister& vm);
3111 
3112   // Signed integer subtract wide (second part).
3113   void ssubw2(const VRegister& vd,
3114               const VRegister& vn,
3115               const VRegister& vm);
3116 
3117   // Unsigned maximum.
3118   void umax(const VRegister& vd,
3119             const VRegister& vn,
3120             const VRegister& vm);
3121 
3122   // Unsigned pairwise maximum.
3123   void umaxp(const VRegister& vd,
3124              const VRegister& vn,
3125              const VRegister& vm);
3126 
3127   // Unsigned maximum across vector.
3128   void umaxv(const VRegister& vd,
3129              const VRegister& vn);
3130 
3131   // Unsigned minimum.
3132   void umin(const VRegister& vd,
3133             const VRegister& vn,
3134             const VRegister& vm);
3135 
3136   // Unsigned pairwise minimum.
3137   void uminp(const VRegister& vd,
3138              const VRegister& vn,
3139              const VRegister& vm);
3140 
3141   // Unsigned minimum across vector.
3142   void uminv(const VRegister& vd,
3143              const VRegister& vn);
3144 
3145   // Transpose vectors (primary).
3146   void trn1(const VRegister& vd,
3147             const VRegister& vn,
3148             const VRegister& vm);
3149 
3150   // Transpose vectors (secondary).
3151   void trn2(const VRegister& vd,
3152             const VRegister& vn,
3153             const VRegister& vm);
3154 
3155   // Unzip vectors (primary).
3156   void uzp1(const VRegister& vd,
3157             const VRegister& vn,
3158             const VRegister& vm);
3159 
3160   // Unzip vectors (secondary).
3161   void uzp2(const VRegister& vd,
3162             const VRegister& vn,
3163             const VRegister& vm);
3164 
3165   // Zip vectors (primary).
3166   void zip1(const VRegister& vd,
3167             const VRegister& vn,
3168             const VRegister& vm);
3169 
3170   // Zip vectors (secondary).
3171   void zip2(const VRegister& vd,
3172             const VRegister& vn,
3173             const VRegister& vm);
3174 
3175   // Signed shift right by immediate.
3176   void sshr(const VRegister& vd,
3177             const VRegister& vn,
3178             int shift);
3179 
3180   // Unsigned shift right by immediate.
3181   void ushr(const VRegister& vd,
3182             const VRegister& vn,
3183             int shift);
3184 
3185   // Signed rounding shift right by immediate.
3186   void srshr(const VRegister& vd,
3187              const VRegister& vn,
3188              int shift);
3189 
3190   // Unsigned rounding shift right by immediate.
3191   void urshr(const VRegister& vd,
3192              const VRegister& vn,
3193              int shift);
3194 
3195   // Signed shift right by immediate and accumulate.
3196   void ssra(const VRegister& vd,
3197             const VRegister& vn,
3198             int shift);
3199 
3200   // Unsigned shift right by immediate and accumulate.
3201   void usra(const VRegister& vd,
3202             const VRegister& vn,
3203             int shift);
3204 
3205   // Signed rounding shift right by immediate and accumulate.
3206   void srsra(const VRegister& vd,
3207              const VRegister& vn,
3208              int shift);
3209 
3210   // Unsigned rounding shift right by immediate and accumulate.
3211   void ursra(const VRegister& vd,
3212              const VRegister& vn,
3213              int shift);
3214 
3215   // Shift right narrow by immediate.
3216   void shrn(const VRegister& vd,
3217             const VRegister& vn,
3218             int shift);
3219 
3220   // Shift right narrow by immediate (second part).
3221   void shrn2(const VRegister& vd,
3222              const VRegister& vn,
3223              int shift);
3224 
3225   // Rounding shift right narrow by immediate.
3226   void rshrn(const VRegister& vd,
3227              const VRegister& vn,
3228              int shift);
3229 
3230   // Rounding shift right narrow by immediate (second part).
3231   void rshrn2(const VRegister& vd,
3232               const VRegister& vn,
3233               int shift);
3234 
3235   // Unsigned saturating shift right narrow by immediate.
3236   void uqshrn(const VRegister& vd,
3237               const VRegister& vn,
3238               int shift);
3239 
3240   // Unsigned saturating shift right narrow by immediate (second part).
3241   void uqshrn2(const VRegister& vd,
3242                const VRegister& vn,
3243                int shift);
3244 
3245   // Unsigned saturating rounding shift right narrow by immediate.
3246   void uqrshrn(const VRegister& vd,
3247                const VRegister& vn,
3248                int shift);
3249 
3250   // Unsigned saturating rounding shift right narrow by immediate (second part).
3251   void uqrshrn2(const VRegister& vd,
3252                 const VRegister& vn,
3253                 int shift);
3254 
3255   // Signed saturating shift right narrow by immediate.
3256   void sqshrn(const VRegister& vd,
3257               const VRegister& vn,
3258               int shift);
3259 
3260   // Signed saturating shift right narrow by immediate (second part).
3261   void sqshrn2(const VRegister& vd,
3262                const VRegister& vn,
3263                int shift);
3264 
3265   // Signed saturating rounded shift right narrow by immediate.
3266   void sqrshrn(const VRegister& vd,
3267                const VRegister& vn,
3268                int shift);
3269 
3270   // Signed saturating rounded shift right narrow by immediate (second part).
3271   void sqrshrn2(const VRegister& vd,
3272                 const VRegister& vn,
3273                 int shift);
3274 
3275   // Signed saturating shift right unsigned narrow by immediate.
3276   void sqshrun(const VRegister& vd,
3277                const VRegister& vn,
3278                int shift);
3279 
3280   // Signed saturating shift right unsigned narrow by immediate (second part).
3281   void sqshrun2(const VRegister& vd,
3282                 const VRegister& vn,
3283                 int shift);
3284 
3285   // Signed sat rounded shift right unsigned narrow by immediate.
3286   void sqrshrun(const VRegister& vd,
3287                 const VRegister& vn,
3288                 int shift);
3289 
3290   // Signed sat rounded shift right unsigned narrow by immediate (second part).
3291   void sqrshrun2(const VRegister& vd,
3292                  const VRegister& vn,
3293                  int shift);
3294 
3295   // FP reciprocal step.
3296   void frecps(const VRegister& vd,
3297               const VRegister& vn,
3298               const VRegister& vm);
3299 
3300   // FP reciprocal estimate.
3301   void frecpe(const VRegister& vd,
3302               const VRegister& vn);
3303 
3304   // FP reciprocal square root estimate.
3305   void frsqrte(const VRegister& vd,
3306                const VRegister& vn);
3307 
3308   // FP reciprocal square root step.
3309   void frsqrts(const VRegister& vd,
3310                const VRegister& vn,
3311                const VRegister& vm);
3312 
3313   // Signed absolute difference and accumulate long.
3314   void sabal(const VRegister& vd,
3315              const VRegister& vn,
3316              const VRegister& vm);
3317 
3318   // Signed absolute difference and accumulate long (second part).
3319   void sabal2(const VRegister& vd,
3320               const VRegister& vn,
3321               const VRegister& vm);
3322 
3323   // Unsigned absolute difference and accumulate long.
3324   void uabal(const VRegister& vd,
3325              const VRegister& vn,
3326              const VRegister& vm);
3327 
3328   // Unsigned absolute difference and accumulate long (second part).
3329   void uabal2(const VRegister& vd,
3330               const VRegister& vn,
3331               const VRegister& vm);
3332 
3333   // Signed absolute difference long.
3334   void sabdl(const VRegister& vd,
3335              const VRegister& vn,
3336              const VRegister& vm);
3337 
3338   // Signed absolute difference long (second part).
3339   void sabdl2(const VRegister& vd,
3340               const VRegister& vn,
3341               const VRegister& vm);
3342 
3343   // Unsigned absolute difference long.
3344   void uabdl(const VRegister& vd,
3345              const VRegister& vn,
3346              const VRegister& vm);
3347 
3348   // Unsigned absolute difference long (second part).
3349   void uabdl2(const VRegister& vd,
3350               const VRegister& vn,
3351               const VRegister& vm);
3352 
3353   // Polynomial multiply long.
3354   void pmull(const VRegister& vd,
3355              const VRegister& vn,
3356              const VRegister& vm);
3357 
3358   // Polynomial multiply long (second part).
3359   void pmull2(const VRegister& vd,
3360               const VRegister& vn,
3361               const VRegister& vm);
3362 
3363   // Signed long multiply-add.
3364   void smlal(const VRegister& vd,
3365              const VRegister& vn,
3366              const VRegister& vm);
3367 
3368   // Signed long multiply-add (second part).
3369   void smlal2(const VRegister& vd,
3370               const VRegister& vn,
3371               const VRegister& vm);
3372 
3373   // Unsigned long multiply-add.
3374   void umlal(const VRegister& vd,
3375              const VRegister& vn,
3376              const VRegister& vm);
3377 
3378   // Unsigned long multiply-add (second part).
3379   void umlal2(const VRegister& vd,
3380               const VRegister& vn,
3381               const VRegister& vm);
3382 
3383   // Signed long multiply-sub.
3384   void smlsl(const VRegister& vd,
3385              const VRegister& vn,
3386              const VRegister& vm);
3387 
3388   // Signed long multiply-sub (second part).
3389   void smlsl2(const VRegister& vd,
3390               const VRegister& vn,
3391               const VRegister& vm);
3392 
3393   // Unsigned long multiply-sub.
3394   void umlsl(const VRegister& vd,
3395              const VRegister& vn,
3396              const VRegister& vm);
3397 
3398   // Unsigned long multiply-sub (second part).
3399   void umlsl2(const VRegister& vd,
3400               const VRegister& vn,
3401               const VRegister& vm);
3402 
3403   // Signed long multiply.
3404   void smull(const VRegister& vd,
3405              const VRegister& vn,
3406              const VRegister& vm);
3407 
3408   // Signed long multiply (second part).
3409   void smull2(const VRegister& vd,
3410               const VRegister& vn,
3411               const VRegister& vm);
3412 
3413   // Signed saturating doubling long multiply-add.
3414   void sqdmlal(const VRegister& vd,
3415                const VRegister& vn,
3416                const VRegister& vm);
3417 
3418   // Signed saturating doubling long multiply-add (second part).
3419   void sqdmlal2(const VRegister& vd,
3420                 const VRegister& vn,
3421                 const VRegister& vm);
3422 
3423   // Signed saturating doubling long multiply-subtract.
3424   void sqdmlsl(const VRegister& vd,
3425                const VRegister& vn,
3426                const VRegister& vm);
3427 
3428   // Signed saturating doubling long multiply-subtract (second part).
3429   void sqdmlsl2(const VRegister& vd,
3430                 const VRegister& vn,
3431                 const VRegister& vm);
3432 
3433   // Signed saturating doubling long multiply.
3434   void sqdmull(const VRegister& vd,
3435                const VRegister& vn,
3436                const VRegister& vm);
3437 
3438   // Signed saturating doubling long multiply (second part).
3439   void sqdmull2(const VRegister& vd,
3440                 const VRegister& vn,
3441                 const VRegister& vm);
3442 
3443   // Signed saturating doubling multiply returning high half.
3444   void sqdmulh(const VRegister& vd,
3445                const VRegister& vn,
3446                const VRegister& vm);
3447 
3448   // Signed saturating rounding doubling multiply returning high half.
3449   void sqrdmulh(const VRegister& vd,
3450                 const VRegister& vn,
3451                 const VRegister& vm);
3452 
3453   // Signed saturating doubling multiply element returning high half.
3454   void sqdmulh(const VRegister& vd,
3455                const VRegister& vn,
3456                const VRegister& vm,
3457                int vm_index);
3458 
3459   // Signed saturating rounding doubling multiply element returning high half.
3460   void sqrdmulh(const VRegister& vd,
3461                 const VRegister& vn,
3462                 const VRegister& vm,
3463                 int vm_index);
3464 
3465   // Unsigned long multiply long.
3466   void umull(const VRegister& vd,
3467              const VRegister& vn,
3468              const VRegister& vm);
3469 
3470   // Unsigned long multiply (second part).
3471   void umull2(const VRegister& vd,
3472               const VRegister& vn,
3473               const VRegister& vm);
3474 
3475   // Add narrow returning high half.
3476   void addhn(const VRegister& vd,
3477              const VRegister& vn,
3478              const VRegister& vm);
3479 
3480   // Add narrow returning high half (second part).
3481   void addhn2(const VRegister& vd,
3482               const VRegister& vn,
3483               const VRegister& vm);
3484 
3485   // Rounding add narrow returning high half.
3486   void raddhn(const VRegister& vd,
3487               const VRegister& vn,
3488               const VRegister& vm);
3489 
3490   // Rounding add narrow returning high half (second part).
3491   void raddhn2(const VRegister& vd,
3492                const VRegister& vn,
3493                const VRegister& vm);
3494 
3495   // Subtract narrow returning high half.
3496   void subhn(const VRegister& vd,
3497              const VRegister& vn,
3498              const VRegister& vm);
3499 
3500   // Subtract narrow returning high half (second part).
3501   void subhn2(const VRegister& vd,
3502               const VRegister& vn,
3503               const VRegister& vm);
3504 
3505   // Rounding subtract narrow returning high half.
3506   void rsubhn(const VRegister& vd,
3507               const VRegister& vn,
3508               const VRegister& vm);
3509 
3510   // Rounding subtract narrow returning high half (second part).
3511   void rsubhn2(const VRegister& vd,
3512                const VRegister& vn,
3513                const VRegister& vm);
3514 
3515   // FP vector multiply accumulate.
3516   void fmla(const VRegister& vd,
3517             const VRegister& vn,
3518             const VRegister& vm);
3519 
3520   // FP vector multiply subtract.
3521   void fmls(const VRegister& vd,
3522             const VRegister& vn,
3523             const VRegister& vm);
3524 
3525   // FP vector multiply extended.
3526   void fmulx(const VRegister& vd,
3527              const VRegister& vn,
3528              const VRegister& vm);
3529 
3530   // FP absolute greater than or equal.
3531   void facge(const VRegister& vd,
3532              const VRegister& vn,
3533              const VRegister& vm);
3534 
3535   // FP absolute greater than.
3536   void facgt(const VRegister& vd,
3537              const VRegister& vn,
3538              const VRegister& vm);
3539 
3540   // FP multiply by element.
3541   void fmul(const VRegister& vd,
3542             const VRegister& vn,
3543             const VRegister& vm,
3544             int vm_index);
3545 
3546   // FP fused multiply-add to accumulator by element.
3547   void fmla(const VRegister& vd,
3548             const VRegister& vn,
3549             const VRegister& vm,
3550             int vm_index);
3551 
3552   // FP fused multiply-sub from accumulator by element.
3553   void fmls(const VRegister& vd,
3554             const VRegister& vn,
3555             const VRegister& vm,
3556             int vm_index);
3557 
3558   // FP multiply extended by element.
3559   void fmulx(const VRegister& vd,
3560              const VRegister& vn,
3561              const VRegister& vm,
3562              int vm_index);
3563 
3564   // FP compare equal.
3565   void fcmeq(const VRegister& vd,
3566              const VRegister& vn,
3567              const VRegister& vm);
3568 
3569   // FP greater than.
3570   void fcmgt(const VRegister& vd,
3571              const VRegister& vn,
3572              const VRegister& vm);
3573 
3574   // FP greater than or equal.
3575   void fcmge(const VRegister& vd,
3576              const VRegister& vn,
3577              const VRegister& vm);
3578 
3579   // FP compare equal to zero.
3580   void fcmeq(const VRegister& vd,
3581              const VRegister& vn,
3582              double imm);
3583 
3584   // FP greater than zero.
3585   void fcmgt(const VRegister& vd,
3586              const VRegister& vn,
3587              double imm);
3588 
3589   // FP greater than or equal to zero.
3590   void fcmge(const VRegister& vd,
3591              const VRegister& vn,
3592              double imm);
3593 
3594   // FP less than or equal to zero.
3595   void fcmle(const VRegister& vd,
3596              const VRegister& vn,
3597              double imm);
3598 
3599   // FP less than to zero.
3600   void fcmlt(const VRegister& vd,
3601              const VRegister& vn,
3602              double imm);
3603 
3604   // FP absolute difference.
3605   void fabd(const VRegister& vd,
3606             const VRegister& vn,
3607             const VRegister& vm);
3608 
3609   // FP pairwise add vector.
3610   void faddp(const VRegister& vd,
3611              const VRegister& vn,
3612              const VRegister& vm);
3613 
3614   // FP pairwise add scalar.
3615   void faddp(const VRegister& vd,
3616              const VRegister& vn);
3617 
3618   // FP pairwise maximum vector.
3619   void fmaxp(const VRegister& vd,
3620              const VRegister& vn,
3621              const VRegister& vm);
3622 
3623   // FP pairwise maximum scalar.
3624   void fmaxp(const VRegister& vd,
3625              const VRegister& vn);
3626 
3627   // FP pairwise minimum vector.
3628   void fminp(const VRegister& vd,
3629              const VRegister& vn,
3630              const VRegister& vm);
3631 
3632   // FP pairwise minimum scalar.
3633   void fminp(const VRegister& vd,
3634              const VRegister& vn);
3635 
3636   // FP pairwise maximum number vector.
3637   void fmaxnmp(const VRegister& vd,
3638                const VRegister& vn,
3639                const VRegister& vm);
3640 
3641   // FP pairwise maximum number scalar.
3642   void fmaxnmp(const VRegister& vd,
3643                const VRegister& vn);
3644 
3645   // FP pairwise minimum number vector.
3646   void fminnmp(const VRegister& vd,
3647                const VRegister& vn,
3648                const VRegister& vm);
3649 
3650   // FP pairwise minimum number scalar.
3651   void fminnmp(const VRegister& vd,
3652                const VRegister& vn);
3653 
3654   // Emit generic instructions.
3655   // Emit raw instructions into the instruction stream.
dci(Instr raw_inst)3656   void dci(Instr raw_inst) { Emit(raw_inst); }
3657 
3658   // Emit 32 bits of data into the instruction stream.
dc32(uint32_t data)3659   void dc32(uint32_t data) {
3660     VIXL_ASSERT(buffer_monitor_ > 0);
3661     buffer_->Emit32(data);
3662   }
3663 
3664   // Emit 64 bits of data into the instruction stream.
dc64(uint64_t data)3665   void dc64(uint64_t data) {
3666     VIXL_ASSERT(buffer_monitor_ > 0);
3667     buffer_->Emit64(data);
3668   }
3669 
3670   // Copy a string into the instruction stream, including the terminating NULL
3671   // character. The instruction pointer is then aligned correctly for
3672   // subsequent instructions.
EmitString(const char * string)3673   void EmitString(const char * string) {
3674     VIXL_ASSERT(string != NULL);
3675     VIXL_ASSERT(buffer_monitor_ > 0);
3676 
3677     buffer_->EmitString(string);
3678     buffer_->Align();
3679   }
3680 
3681   // Code generation helpers.
3682 
3683   // Register encoding.
Rd(CPURegister rd)3684   static Instr Rd(CPURegister rd) {
3685     VIXL_ASSERT(rd.code() != kSPRegInternalCode);
3686     return rd.code() << Rd_offset;
3687   }
3688 
Rn(CPURegister rn)3689   static Instr Rn(CPURegister rn) {
3690     VIXL_ASSERT(rn.code() != kSPRegInternalCode);
3691     return rn.code() << Rn_offset;
3692   }
3693 
Rm(CPURegister rm)3694   static Instr Rm(CPURegister rm) {
3695     VIXL_ASSERT(rm.code() != kSPRegInternalCode);
3696     return rm.code() << Rm_offset;
3697   }
3698 
RmNot31(CPURegister rm)3699   static Instr RmNot31(CPURegister rm) {
3700     VIXL_ASSERT(rm.code() != kSPRegInternalCode);
3701     VIXL_ASSERT(!rm.IsZero());
3702     return Rm(rm);
3703   }
3704 
Ra(CPURegister ra)3705   static Instr Ra(CPURegister ra) {
3706     VIXL_ASSERT(ra.code() != kSPRegInternalCode);
3707     return ra.code() << Ra_offset;
3708   }
3709 
Rt(CPURegister rt)3710   static Instr Rt(CPURegister rt) {
3711     VIXL_ASSERT(rt.code() != kSPRegInternalCode);
3712     return rt.code() << Rt_offset;
3713   }
3714 
Rt2(CPURegister rt2)3715   static Instr Rt2(CPURegister rt2) {
3716     VIXL_ASSERT(rt2.code() != kSPRegInternalCode);
3717     return rt2.code() << Rt2_offset;
3718   }
3719 
Rs(CPURegister rs)3720   static Instr Rs(CPURegister rs) {
3721     VIXL_ASSERT(rs.code() != kSPRegInternalCode);
3722     return rs.code() << Rs_offset;
3723   }
3724 
3725   // These encoding functions allow the stack pointer to be encoded, and
3726   // disallow the zero register.
RdSP(Register rd)3727   static Instr RdSP(Register rd) {
3728     VIXL_ASSERT(!rd.IsZero());
3729     return (rd.code() & kRegCodeMask) << Rd_offset;
3730   }
3731 
RnSP(Register rn)3732   static Instr RnSP(Register rn) {
3733     VIXL_ASSERT(!rn.IsZero());
3734     return (rn.code() & kRegCodeMask) << Rn_offset;
3735   }
3736 
3737   // Flags encoding.
Flags(FlagsUpdate S)3738   static Instr Flags(FlagsUpdate S) {
3739     if (S == SetFlags) {
3740       return 1 << FlagsUpdate_offset;
3741     } else if (S == LeaveFlags) {
3742       return 0 << FlagsUpdate_offset;
3743     }
3744     VIXL_UNREACHABLE();
3745     return 0;
3746   }
3747 
Cond(Condition cond)3748   static Instr Cond(Condition cond) {
3749     return cond << Condition_offset;
3750   }
3751 
3752   // PC-relative address encoding.
ImmPCRelAddress(int imm21)3753   static Instr ImmPCRelAddress(int imm21) {
3754     VIXL_ASSERT(is_int21(imm21));
3755     Instr imm = static_cast<Instr>(truncate_to_int21(imm21));
3756     Instr immhi = (imm >> ImmPCRelLo_width) << ImmPCRelHi_offset;
3757     Instr immlo = imm << ImmPCRelLo_offset;
3758     return (immhi & ImmPCRelHi_mask) | (immlo & ImmPCRelLo_mask);
3759   }
3760 
3761   // Branch encoding.
ImmUncondBranch(int imm26)3762   static Instr ImmUncondBranch(int imm26) {
3763     VIXL_ASSERT(is_int26(imm26));
3764     return truncate_to_int26(imm26) << ImmUncondBranch_offset;
3765   }
3766 
ImmCondBranch(int imm19)3767   static Instr ImmCondBranch(int imm19) {
3768     VIXL_ASSERT(is_int19(imm19));
3769     return truncate_to_int19(imm19) << ImmCondBranch_offset;
3770   }
3771 
ImmCmpBranch(int imm19)3772   static Instr ImmCmpBranch(int imm19) {
3773     VIXL_ASSERT(is_int19(imm19));
3774     return truncate_to_int19(imm19) << ImmCmpBranch_offset;
3775   }
3776 
ImmTestBranch(int imm14)3777   static Instr ImmTestBranch(int imm14) {
3778     VIXL_ASSERT(is_int14(imm14));
3779     return truncate_to_int14(imm14) << ImmTestBranch_offset;
3780   }
3781 
ImmTestBranchBit(unsigned bit_pos)3782   static Instr ImmTestBranchBit(unsigned bit_pos) {
3783     VIXL_ASSERT(is_uint6(bit_pos));
3784     // Subtract five from the shift offset, as we need bit 5 from bit_pos.
3785     unsigned b5 = bit_pos << (ImmTestBranchBit5_offset - 5);
3786     unsigned b40 = bit_pos << ImmTestBranchBit40_offset;
3787     b5 &= ImmTestBranchBit5_mask;
3788     b40 &= ImmTestBranchBit40_mask;
3789     return b5 | b40;
3790   }
3791 
3792   // Data Processing encoding.
SF(Register rd)3793   static Instr SF(Register rd) {
3794       return rd.Is64Bits() ? SixtyFourBits : ThirtyTwoBits;
3795   }
3796 
ImmAddSub(int64_t imm)3797   static Instr ImmAddSub(int64_t imm) {
3798     VIXL_ASSERT(IsImmAddSub(imm));
3799     if (is_uint12(imm)) {  // No shift required.
3800       return imm << ImmAddSub_offset;
3801     } else {
3802       return ((imm >> 12) << ImmAddSub_offset) | (1 << ShiftAddSub_offset);
3803     }
3804   }
3805 
ImmS(unsigned imms,unsigned reg_size)3806   static Instr ImmS(unsigned imms, unsigned reg_size) {
3807     VIXL_ASSERT(((reg_size == kXRegSize) && is_uint6(imms)) ||
3808            ((reg_size == kWRegSize) && is_uint5(imms)));
3809     USE(reg_size);
3810     return imms << ImmS_offset;
3811   }
3812 
ImmR(unsigned immr,unsigned reg_size)3813   static Instr ImmR(unsigned immr, unsigned reg_size) {
3814     VIXL_ASSERT(((reg_size == kXRegSize) && is_uint6(immr)) ||
3815            ((reg_size == kWRegSize) && is_uint5(immr)));
3816     USE(reg_size);
3817     VIXL_ASSERT(is_uint6(immr));
3818     return immr << ImmR_offset;
3819   }
3820 
ImmSetBits(unsigned imms,unsigned reg_size)3821   static Instr ImmSetBits(unsigned imms, unsigned reg_size) {
3822     VIXL_ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize));
3823     VIXL_ASSERT(is_uint6(imms));
3824     VIXL_ASSERT((reg_size == kXRegSize) || is_uint6(imms + 3));
3825     USE(reg_size);
3826     return imms << ImmSetBits_offset;
3827   }
3828 
ImmRotate(unsigned immr,unsigned reg_size)3829   static Instr ImmRotate(unsigned immr, unsigned reg_size) {
3830     VIXL_ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize));
3831     VIXL_ASSERT(((reg_size == kXRegSize) && is_uint6(immr)) ||
3832            ((reg_size == kWRegSize) && is_uint5(immr)));
3833     USE(reg_size);
3834     return immr << ImmRotate_offset;
3835   }
3836 
ImmLLiteral(int imm19)3837   static Instr ImmLLiteral(int imm19) {
3838     VIXL_ASSERT(is_int19(imm19));
3839     return truncate_to_int19(imm19) << ImmLLiteral_offset;
3840   }
3841 
BitN(unsigned bitn,unsigned reg_size)3842   static Instr BitN(unsigned bitn, unsigned reg_size) {
3843     VIXL_ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize));
3844     VIXL_ASSERT((reg_size == kXRegSize) || (bitn == 0));
3845     USE(reg_size);
3846     return bitn << BitN_offset;
3847   }
3848 
ShiftDP(Shift shift)3849   static Instr ShiftDP(Shift shift) {
3850     VIXL_ASSERT(shift == LSL || shift == LSR || shift == ASR || shift == ROR);
3851     return shift << ShiftDP_offset;
3852   }
3853 
ImmDPShift(unsigned amount)3854   static Instr ImmDPShift(unsigned amount) {
3855     VIXL_ASSERT(is_uint6(amount));
3856     return amount << ImmDPShift_offset;
3857   }
3858 
ExtendMode(Extend extend)3859   static Instr ExtendMode(Extend extend) {
3860     return extend << ExtendMode_offset;
3861   }
3862 
ImmExtendShift(unsigned left_shift)3863   static Instr ImmExtendShift(unsigned left_shift) {
3864     VIXL_ASSERT(left_shift <= 4);
3865     return left_shift << ImmExtendShift_offset;
3866   }
3867 
ImmCondCmp(unsigned imm)3868   static Instr ImmCondCmp(unsigned imm) {
3869     VIXL_ASSERT(is_uint5(imm));
3870     return imm << ImmCondCmp_offset;
3871   }
3872 
Nzcv(StatusFlags nzcv)3873   static Instr Nzcv(StatusFlags nzcv) {
3874     return ((nzcv >> Flags_offset) & 0xf) << Nzcv_offset;
3875   }
3876 
3877   // MemOperand offset encoding.
ImmLSUnsigned(int imm12)3878   static Instr ImmLSUnsigned(int imm12) {
3879     VIXL_ASSERT(is_uint12(imm12));
3880     return imm12 << ImmLSUnsigned_offset;
3881   }
3882 
ImmLS(int imm9)3883   static Instr ImmLS(int imm9) {
3884     VIXL_ASSERT(is_int9(imm9));
3885     return truncate_to_int9(imm9) << ImmLS_offset;
3886   }
3887 
ImmLSPair(int imm7,unsigned access_size)3888   static Instr ImmLSPair(int imm7, unsigned access_size) {
3889     VIXL_ASSERT(((imm7 >> access_size) << access_size) == imm7);
3890     int scaled_imm7 = imm7 >> access_size;
3891     VIXL_ASSERT(is_int7(scaled_imm7));
3892     return truncate_to_int7(scaled_imm7) << ImmLSPair_offset;
3893   }
3894 
ImmShiftLS(unsigned shift_amount)3895   static Instr ImmShiftLS(unsigned shift_amount) {
3896     VIXL_ASSERT(is_uint1(shift_amount));
3897     return shift_amount << ImmShiftLS_offset;
3898   }
3899 
ImmPrefetchOperation(int imm5)3900   static Instr ImmPrefetchOperation(int imm5) {
3901     VIXL_ASSERT(is_uint5(imm5));
3902     return imm5 << ImmPrefetchOperation_offset;
3903   }
3904 
ImmException(int imm16)3905   static Instr ImmException(int imm16) {
3906     VIXL_ASSERT(is_uint16(imm16));
3907     return imm16 << ImmException_offset;
3908   }
3909 
ImmSystemRegister(int imm15)3910   static Instr ImmSystemRegister(int imm15) {
3911     VIXL_ASSERT(is_uint15(imm15));
3912     return imm15 << ImmSystemRegister_offset;
3913   }
3914 
ImmHint(int imm7)3915   static Instr ImmHint(int imm7) {
3916     VIXL_ASSERT(is_uint7(imm7));
3917     return imm7 << ImmHint_offset;
3918   }
3919 
CRm(int imm4)3920   static Instr CRm(int imm4) {
3921     VIXL_ASSERT(is_uint4(imm4));
3922     return imm4 << CRm_offset;
3923   }
3924 
CRn(int imm4)3925   static Instr CRn(int imm4) {
3926     VIXL_ASSERT(is_uint4(imm4));
3927     return imm4 << CRn_offset;
3928   }
3929 
SysOp(int imm14)3930   static Instr SysOp(int imm14) {
3931     VIXL_ASSERT(is_uint14(imm14));
3932     return imm14 << SysOp_offset;
3933   }
3934 
ImmSysOp1(int imm3)3935   static Instr ImmSysOp1(int imm3) {
3936     VIXL_ASSERT(is_uint3(imm3));
3937     return imm3 << SysOp1_offset;
3938   }
3939 
ImmSysOp2(int imm3)3940   static Instr ImmSysOp2(int imm3) {
3941     VIXL_ASSERT(is_uint3(imm3));
3942     return imm3 << SysOp2_offset;
3943   }
3944 
ImmBarrierDomain(int imm2)3945   static Instr ImmBarrierDomain(int imm2) {
3946     VIXL_ASSERT(is_uint2(imm2));
3947     return imm2 << ImmBarrierDomain_offset;
3948   }
3949 
ImmBarrierType(int imm2)3950   static Instr ImmBarrierType(int imm2) {
3951     VIXL_ASSERT(is_uint2(imm2));
3952     return imm2 << ImmBarrierType_offset;
3953   }
3954 
3955   // Move immediates encoding.
ImmMoveWide(uint64_t imm)3956   static Instr ImmMoveWide(uint64_t imm) {
3957     VIXL_ASSERT(is_uint16(imm));
3958     return imm << ImmMoveWide_offset;
3959   }
3960 
ShiftMoveWide(int64_t shift)3961   static Instr ShiftMoveWide(int64_t shift) {
3962     VIXL_ASSERT(is_uint2(shift));
3963     return shift << ShiftMoveWide_offset;
3964   }
3965 
3966   // FP Immediates.
3967   static Instr ImmFP32(float imm);
3968   static Instr ImmFP64(double imm);
3969 
3970   // FP register type.
FPType(FPRegister fd)3971   static Instr FPType(FPRegister fd) {
3972     return fd.Is64Bits() ? FP64 : FP32;
3973   }
3974 
FPScale(unsigned scale)3975   static Instr FPScale(unsigned scale) {
3976     VIXL_ASSERT(is_uint6(scale));
3977     return scale << FPScale_offset;
3978   }
3979 
3980   // Immediate field checking helpers.
3981   static bool IsImmAddSub(int64_t immediate);
3982   static bool IsImmConditionalCompare(int64_t immediate);
3983   static bool IsImmFP32(float imm);
3984   static bool IsImmFP64(double imm);
3985   static bool IsImmLogical(uint64_t value,
3986                            unsigned width,
3987                            unsigned* n = NULL,
3988                            unsigned* imm_s = NULL,
3989                            unsigned* imm_r = NULL);
3990   static bool IsImmLSPair(int64_t offset, unsigned access_size);
3991   static bool IsImmLSScaled(int64_t offset, unsigned access_size);
3992   static bool IsImmLSUnscaled(int64_t offset);
3993   static bool IsImmMovn(uint64_t imm, unsigned reg_size);
3994   static bool IsImmMovz(uint64_t imm, unsigned reg_size);
3995 
3996   // Instruction bits for vector format in data processing operations.
VFormat(VRegister vd)3997   static Instr VFormat(VRegister vd) {
3998     if (vd.Is64Bits()) {
3999       switch (vd.lanes()) {
4000         case 2: return NEON_2S;
4001         case 4: return NEON_4H;
4002         case 8: return NEON_8B;
4003         default: return 0xffffffff;
4004       }
4005     } else {
4006       VIXL_ASSERT(vd.Is128Bits());
4007       switch (vd.lanes()) {
4008         case 2: return NEON_2D;
4009         case 4: return NEON_4S;
4010         case 8: return NEON_8H;
4011         case 16: return NEON_16B;
4012         default: return 0xffffffff;
4013       }
4014     }
4015   }
4016 
4017   // Instruction bits for vector format in floating point data processing
4018   // operations.
FPFormat(VRegister vd)4019   static Instr FPFormat(VRegister vd) {
4020     if (vd.lanes() == 1) {
4021       // Floating point scalar formats.
4022       VIXL_ASSERT(vd.Is32Bits() || vd.Is64Bits());
4023       return vd.Is64Bits() ? FP64 : FP32;
4024     }
4025 
4026     // Two lane floating point vector formats.
4027     if (vd.lanes() == 2) {
4028       VIXL_ASSERT(vd.Is64Bits() || vd.Is128Bits());
4029       return vd.Is128Bits() ? NEON_FP_2D : NEON_FP_2S;
4030     }
4031 
4032     // Four lane floating point vector format.
4033     VIXL_ASSERT((vd.lanes() == 4) && vd.Is128Bits());
4034     return NEON_FP_4S;
4035   }
4036 
4037   // Instruction bits for vector format in load and store operations.
LSVFormat(VRegister vd)4038   static Instr LSVFormat(VRegister vd) {
4039     if (vd.Is64Bits()) {
4040       switch (vd.lanes()) {
4041         case 1: return LS_NEON_1D;
4042         case 2: return LS_NEON_2S;
4043         case 4: return LS_NEON_4H;
4044         case 8: return LS_NEON_8B;
4045         default: return 0xffffffff;
4046       }
4047     } else {
4048       VIXL_ASSERT(vd.Is128Bits());
4049       switch (vd.lanes()) {
4050         case 2: return LS_NEON_2D;
4051         case 4: return LS_NEON_4S;
4052         case 8: return LS_NEON_8H;
4053         case 16: return LS_NEON_16B;
4054         default: return 0xffffffff;
4055       }
4056     }
4057   }
4058 
4059   // Instruction bits for scalar format in data processing operations.
SFormat(VRegister vd)4060   static Instr SFormat(VRegister vd) {
4061     VIXL_ASSERT(vd.lanes() == 1);
4062     switch (vd.SizeInBytes()) {
4063       case 1: return NEON_B;
4064       case 2: return NEON_H;
4065       case 4: return NEON_S;
4066       case 8: return NEON_D;
4067       default: return 0xffffffff;
4068     }
4069   }
4070 
ImmNEONHLM(int index,int num_bits)4071   static Instr ImmNEONHLM(int index, int num_bits) {
4072     int h, l, m;
4073     if (num_bits == 3) {
4074       VIXL_ASSERT(is_uint3(index));
4075       h  = (index >> 2) & 1;
4076       l  = (index >> 1) & 1;
4077       m  = (index >> 0) & 1;
4078     } else if (num_bits == 2) {
4079       VIXL_ASSERT(is_uint2(index));
4080       h  = (index >> 1) & 1;
4081       l  = (index >> 0) & 1;
4082       m  = 0;
4083     } else {
4084       VIXL_ASSERT(is_uint1(index) && (num_bits == 1));
4085       h  = (index >> 0) & 1;
4086       l  = 0;
4087       m  = 0;
4088     }
4089     return (h << NEONH_offset) | (l << NEONL_offset) | (m << NEONM_offset);
4090   }
4091 
ImmNEONExt(int imm4)4092   static Instr ImmNEONExt(int imm4) {
4093     VIXL_ASSERT(is_uint4(imm4));
4094     return imm4 << ImmNEONExt_offset;
4095   }
4096 
ImmNEON5(Instr format,int index)4097   static Instr ImmNEON5(Instr format, int index) {
4098     VIXL_ASSERT(is_uint4(index));
4099     int s = LaneSizeInBytesLog2FromFormat(static_cast<VectorFormat>(format));
4100     int imm5 = (index << (s + 1)) | (1 << s);
4101     return imm5 << ImmNEON5_offset;
4102   }
4103 
ImmNEON4(Instr format,int index)4104   static Instr ImmNEON4(Instr format, int index) {
4105     VIXL_ASSERT(is_uint4(index));
4106     int s = LaneSizeInBytesLog2FromFormat(static_cast<VectorFormat>(format));
4107     int imm4 = index << s;
4108     return imm4 << ImmNEON4_offset;
4109   }
4110 
ImmNEONabcdefgh(int imm8)4111   static Instr ImmNEONabcdefgh(int imm8) {
4112     VIXL_ASSERT(is_uint8(imm8));
4113     Instr instr;
4114     instr  = ((imm8 >> 5) & 7) << ImmNEONabc_offset;
4115     instr |= (imm8 & 0x1f) << ImmNEONdefgh_offset;
4116     return instr;
4117   }
4118 
NEONCmode(int cmode)4119   static Instr NEONCmode(int cmode) {
4120     VIXL_ASSERT(is_uint4(cmode));
4121     return cmode << NEONCmode_offset;
4122   }
4123 
NEONModImmOp(int op)4124   static Instr NEONModImmOp(int op) {
4125     VIXL_ASSERT(is_uint1(op));
4126     return op << NEONModImmOp_offset;
4127   }
4128 
4129   // Size of the code generated since label to the current position.
SizeOfCodeGeneratedSince(Label * label)4130   size_t SizeOfCodeGeneratedSince(Label* label) const {
4131     VIXL_ASSERT(label->IsBound());
4132     return buffer_->OffsetFrom(label->location());
4133   }
4134 
SizeOfCodeGenerated()4135   size_t SizeOfCodeGenerated() const {
4136     return buffer_->CursorOffset();
4137   }
4138 
BufferCapacity()4139   size_t BufferCapacity() const { return buffer_->capacity(); }
4140 
RemainingBufferSpace()4141   size_t RemainingBufferSpace() const { return buffer_->RemainingBytes(); }
4142 
EnsureSpaceFor(size_t amount)4143   void EnsureSpaceFor(size_t amount) {
4144     if (buffer_->RemainingBytes() < amount) {
4145       size_t capacity = buffer_->capacity();
4146       size_t size = buffer_->CursorOffset();
4147       do {
4148         // TODO(all): refine.
4149         capacity *= 2;
4150       } while ((capacity - size) <  amount);
4151       buffer_->Grow(capacity);
4152     }
4153   }
4154 
4155 #ifdef VIXL_DEBUG
AcquireBuffer()4156   void AcquireBuffer() {
4157     VIXL_ASSERT(buffer_monitor_ >= 0);
4158     buffer_monitor_++;
4159   }
4160 
ReleaseBuffer()4161   void ReleaseBuffer() {
4162     buffer_monitor_--;
4163     VIXL_ASSERT(buffer_monitor_ >= 0);
4164   }
4165 #endif
4166 
pic()4167   PositionIndependentCodeOption pic() const {
4168     return pic_;
4169   }
4170 
AllowPageOffsetDependentCode()4171   bool AllowPageOffsetDependentCode() const {
4172     return (pic() == PageOffsetDependentCode) ||
4173            (pic() == PositionDependentCode);
4174   }
4175 
AppropriateZeroRegFor(const CPURegister & reg)4176   static const Register& AppropriateZeroRegFor(const CPURegister& reg) {
4177     return reg.Is64Bits() ? xzr : wzr;
4178   }
4179 
4180 
4181  protected:
4182   void LoadStore(const CPURegister& rt,
4183                  const MemOperand& addr,
4184                  LoadStoreOp op,
4185                  LoadStoreScalingOption option = PreferScaledOffset);
4186 
4187   void LoadStorePair(const CPURegister& rt,
4188                      const CPURegister& rt2,
4189                      const MemOperand& addr,
4190                      LoadStorePairOp op);
4191   void LoadStoreStruct(const VRegister& vt,
4192                        const MemOperand& addr,
4193                        NEONLoadStoreMultiStructOp op);
4194   void LoadStoreStruct1(const VRegister& vt,
4195                         int reg_count,
4196                         const MemOperand& addr);
4197   void LoadStoreStructSingle(const VRegister& vt,
4198                              uint32_t lane,
4199                              const MemOperand& addr,
4200                              NEONLoadStoreSingleStructOp op);
4201   void LoadStoreStructSingleAllLanes(const VRegister& vt,
4202                                      const MemOperand& addr,
4203                                      NEONLoadStoreSingleStructOp op);
4204   void LoadStoreStructVerify(const VRegister& vt,
4205                              const MemOperand& addr,
4206                              Instr op);
4207 
4208   void Prefetch(PrefetchOperation op,
4209                 const MemOperand& addr,
4210                 LoadStoreScalingOption option = PreferScaledOffset);
4211 
4212   // TODO(all): The third parameter should be passed by reference but gcc 4.8.2
4213   // reports a bogus uninitialised warning then.
4214   void Logical(const Register& rd,
4215                const Register& rn,
4216                const Operand operand,
4217                LogicalOp op);
4218   void LogicalImmediate(const Register& rd,
4219                         const Register& rn,
4220                         unsigned n,
4221                         unsigned imm_s,
4222                         unsigned imm_r,
4223                         LogicalOp op);
4224 
4225   void ConditionalCompare(const Register& rn,
4226                           const Operand& operand,
4227                           StatusFlags nzcv,
4228                           Condition cond,
4229                           ConditionalCompareOp op);
4230 
4231   void AddSubWithCarry(const Register& rd,
4232                        const Register& rn,
4233                        const Operand& operand,
4234                        FlagsUpdate S,
4235                        AddSubWithCarryOp op);
4236 
4237 
4238   // Functions for emulating operands not directly supported by the instruction
4239   // set.
4240   void EmitShift(const Register& rd,
4241                  const Register& rn,
4242                  Shift shift,
4243                  unsigned amount);
4244   void EmitExtendShift(const Register& rd,
4245                        const Register& rn,
4246                        Extend extend,
4247                        unsigned left_shift);
4248 
4249   void AddSub(const Register& rd,
4250               const Register& rn,
4251               const Operand& operand,
4252               FlagsUpdate S,
4253               AddSubOp op);
4254 
4255   void NEONTable(const VRegister& vd,
4256                  const VRegister& vn,
4257                  const VRegister& vm,
4258                  NEONTableOp op);
4259 
4260   // Find an appropriate LoadStoreOp or LoadStorePairOp for the specified
4261   // registers. Only simple loads are supported; sign- and zero-extension (such
4262   // as in LDPSW_x or LDRB_w) are not supported.
4263   static LoadStoreOp LoadOpFor(const CPURegister& rt);
4264   static LoadStorePairOp LoadPairOpFor(const CPURegister& rt,
4265                                        const CPURegister& rt2);
4266   static LoadStoreOp StoreOpFor(const CPURegister& rt);
4267   static LoadStorePairOp StorePairOpFor(const CPURegister& rt,
4268                                         const CPURegister& rt2);
4269   static LoadStorePairNonTemporalOp LoadPairNonTemporalOpFor(
4270     const CPURegister& rt, const CPURegister& rt2);
4271   static LoadStorePairNonTemporalOp StorePairNonTemporalOpFor(
4272     const CPURegister& rt, const CPURegister& rt2);
4273   static LoadLiteralOp LoadLiteralOpFor(const CPURegister& rt);
4274 
4275 
4276  private:
4277   static uint32_t FP32ToImm8(float imm);
4278   static uint32_t FP64ToImm8(double imm);
4279 
4280   // Instruction helpers.
4281   void MoveWide(const Register& rd,
4282                 uint64_t imm,
4283                 int shift,
4284                 MoveWideImmediateOp mov_op);
4285   void DataProcShiftedRegister(const Register& rd,
4286                                const Register& rn,
4287                                const Operand& operand,
4288                                FlagsUpdate S,
4289                                Instr op);
4290   void DataProcExtendedRegister(const Register& rd,
4291                                 const Register& rn,
4292                                 const Operand& operand,
4293                                 FlagsUpdate S,
4294                                 Instr op);
4295   void LoadStorePairNonTemporal(const CPURegister& rt,
4296                                 const CPURegister& rt2,
4297                                 const MemOperand& addr,
4298                                 LoadStorePairNonTemporalOp op);
4299   void LoadLiteral(const CPURegister& rt, uint64_t imm, LoadLiteralOp op);
4300   void ConditionalSelect(const Register& rd,
4301                          const Register& rn,
4302                          const Register& rm,
4303                          Condition cond,
4304                          ConditionalSelectOp op);
4305   void DataProcessing1Source(const Register& rd,
4306                              const Register& rn,
4307                              DataProcessing1SourceOp op);
4308   void DataProcessing3Source(const Register& rd,
4309                              const Register& rn,
4310                              const Register& rm,
4311                              const Register& ra,
4312                              DataProcessing3SourceOp op);
4313   void FPDataProcessing1Source(const VRegister& fd,
4314                                const VRegister& fn,
4315                                FPDataProcessing1SourceOp op);
4316   void FPDataProcessing3Source(const VRegister& fd,
4317                                const VRegister& fn,
4318                                const VRegister& fm,
4319                                const VRegister& fa,
4320                                FPDataProcessing3SourceOp op);
4321   void NEONAcrossLanesL(const VRegister& vd,
4322                         const VRegister& vn,
4323                         NEONAcrossLanesOp op);
4324   void NEONAcrossLanes(const VRegister& vd,
4325                        const VRegister& vn,
4326                        NEONAcrossLanesOp op);
4327   void NEONModifiedImmShiftLsl(const VRegister& vd,
4328                                const int imm8,
4329                                const int left_shift,
4330                                NEONModifiedImmediateOp op);
4331   void NEONModifiedImmShiftMsl(const VRegister& vd,
4332                                const int imm8,
4333                                const int shift_amount,
4334                                NEONModifiedImmediateOp op);
4335   void NEONFP2Same(const VRegister& vd,
4336                    const VRegister& vn,
4337                    Instr vop);
4338   void NEON3Same(const VRegister& vd,
4339                  const VRegister& vn,
4340                  const VRegister& vm,
4341                  NEON3SameOp vop);
4342   void NEONFP3Same(const VRegister& vd,
4343                    const VRegister& vn,
4344                    const VRegister& vm,
4345                    Instr op);
4346   void NEON3DifferentL(const VRegister& vd,
4347                        const VRegister& vn,
4348                        const VRegister& vm,
4349                        NEON3DifferentOp vop);
4350   void NEON3DifferentW(const VRegister& vd,
4351                        const VRegister& vn,
4352                        const VRegister& vm,
4353                        NEON3DifferentOp vop);
4354   void NEON3DifferentHN(const VRegister& vd,
4355                         const VRegister& vn,
4356                         const VRegister& vm,
4357                         NEON3DifferentOp vop);
4358   void NEONFP2RegMisc(const VRegister& vd,
4359                       const VRegister& vn,
4360                       NEON2RegMiscOp vop,
4361                       double value = 0.0);
4362   void NEON2RegMisc(const VRegister& vd,
4363                     const VRegister& vn,
4364                     NEON2RegMiscOp vop,
4365                     int value = 0);
4366   void NEONFP2RegMisc(const VRegister& vd,
4367                       const VRegister& vn,
4368                       Instr op);
4369   void NEONAddlp(const VRegister& vd,
4370                  const VRegister& vn,
4371                  NEON2RegMiscOp op);
4372   void NEONPerm(const VRegister& vd,
4373                 const VRegister& vn,
4374                 const VRegister& vm,
4375                 NEONPermOp op);
4376   void NEONFPByElement(const VRegister& vd,
4377                        const VRegister& vn,
4378                        const VRegister& vm,
4379                        int vm_index,
4380                        NEONByIndexedElementOp op);
4381   void NEONByElement(const VRegister& vd,
4382                      const VRegister& vn,
4383                      const VRegister& vm,
4384                      int vm_index,
4385                      NEONByIndexedElementOp op);
4386   void NEONByElementL(const VRegister& vd,
4387                       const VRegister& vn,
4388                       const VRegister& vm,
4389                       int vm_index,
4390                       NEONByIndexedElementOp op);
4391   void NEONShiftImmediate(const VRegister& vd,
4392                           const VRegister& vn,
4393                           NEONShiftImmediateOp op,
4394                           int immh_immb);
4395   void NEONShiftLeftImmediate(const VRegister& vd,
4396                               const VRegister& vn,
4397                               int shift,
4398                               NEONShiftImmediateOp op);
4399   void NEONShiftRightImmediate(const VRegister& vd,
4400                                const VRegister& vn,
4401                                int shift,
4402                                NEONShiftImmediateOp op);
4403   void NEONShiftImmediateL(const VRegister& vd,
4404                            const VRegister& vn,
4405                            int shift,
4406                            NEONShiftImmediateOp op);
4407   void NEONShiftImmediateN(const VRegister& vd,
4408                            const VRegister& vn,
4409                            int shift,
4410                            NEONShiftImmediateOp op);
4411   void NEONXtn(const VRegister& vd,
4412                const VRegister& vn,
4413                NEON2RegMiscOp vop);
4414 
4415   Instr LoadStoreStructAddrModeField(const MemOperand& addr);
4416 
4417   // Encode the specified MemOperand for the specified access size and scaling
4418   // preference.
4419   Instr LoadStoreMemOperand(const MemOperand& addr,
4420                             unsigned access_size,
4421                             LoadStoreScalingOption option);
4422 
4423   // Link the current (not-yet-emitted) instruction to the specified label, then
4424   // return an offset to be encoded in the instruction. If the label is not yet
4425   // bound, an offset of 0 is returned.
4426   ptrdiff_t LinkAndGetByteOffsetTo(Label * label);
4427   ptrdiff_t LinkAndGetInstructionOffsetTo(Label * label);
4428   ptrdiff_t LinkAndGetPageOffsetTo(Label * label);
4429 
4430   // A common implementation for the LinkAndGet<Type>OffsetTo helpers.
4431   template <int element_shift>
4432   ptrdiff_t LinkAndGetOffsetTo(Label* label);
4433 
4434   // Literal load offset are in words (32-bit).
4435   ptrdiff_t LinkAndGetWordOffsetTo(RawLiteral* literal);
4436 
4437   // Emit the instruction in buffer_.
Emit(Instr instruction)4438   void Emit(Instr instruction) {
4439     VIXL_STATIC_ASSERT(sizeof(instruction) == kInstructionSize);
4440     VIXL_ASSERT(buffer_monitor_ > 0);
4441     buffer_->Emit32(instruction);
4442   }
4443 
4444   // Buffer where the code is emitted.
4445   CodeBuffer* buffer_;
4446   PositionIndependentCodeOption pic_;
4447 
4448 #ifdef VIXL_DEBUG
4449   int64_t buffer_monitor_;
4450 #endif
4451 };
4452 
4453 
4454 // All Assembler emits MUST acquire/release the underlying code buffer. The
4455 // helper scope below will do so and optionally ensure the buffer is big enough
4456 // to receive the emit. It is possible to request the scope not to perform any
4457 // checks (kNoCheck) if for example it is known in advance the buffer size is
4458 // adequate or there is some other size checking mechanism in place.
4459 class CodeBufferCheckScope {
4460  public:
4461   // Tell whether or not the scope needs to ensure the associated CodeBuffer
4462   // has enough space for the requested size.
4463   enum CheckPolicy {
4464     kNoCheck,
4465     kCheck
4466   };
4467 
4468   // Tell whether or not the scope should assert the amount of code emitted
4469   // within the scope is consistent with the requested amount.
4470   enum AssertPolicy {
4471     kNoAssert,    // No assert required.
4472     kExactSize,   // The code emitted must be exactly size bytes.
4473     kMaximumSize  // The code emitted must be at most size bytes.
4474   };
4475 
4476   CodeBufferCheckScope(Assembler* assm,
4477                        size_t size,
4478                        CheckPolicy check_policy = kCheck,
4479                        AssertPolicy assert_policy = kMaximumSize)
assm_(assm)4480       : assm_(assm) {
4481     if (check_policy == kCheck) assm->EnsureSpaceFor(size);
4482 #ifdef VIXL_DEBUG
4483     assm->bind(&start_);
4484     size_ = size;
4485     assert_policy_ = assert_policy;
4486     assm->AcquireBuffer();
4487 #else
4488     USE(assert_policy);
4489 #endif
4490   }
4491 
4492   // This is a shortcut for CodeBufferCheckScope(assm, 0, kNoCheck, kNoAssert).
CodeBufferCheckScope(Assembler * assm)4493   explicit CodeBufferCheckScope(Assembler* assm) : assm_(assm) {
4494 #ifdef VIXL_DEBUG
4495     size_ = 0;
4496     assert_policy_ = kNoAssert;
4497     assm->AcquireBuffer();
4498 #endif
4499   }
4500 
~CodeBufferCheckScope()4501   ~CodeBufferCheckScope() {
4502 #ifdef VIXL_DEBUG
4503     assm_->ReleaseBuffer();
4504     switch (assert_policy_) {
4505       case kNoAssert: break;
4506       case kExactSize:
4507         VIXL_ASSERT(assm_->SizeOfCodeGeneratedSince(&start_) == size_);
4508         break;
4509       case kMaximumSize:
4510         VIXL_ASSERT(assm_->SizeOfCodeGeneratedSince(&start_) <= size_);
4511         break;
4512       default:
4513         VIXL_UNREACHABLE();
4514     }
4515 #endif
4516   }
4517 
4518  protected:
4519   Assembler* assm_;
4520 #ifdef VIXL_DEBUG
4521   Label start_;
4522   size_t size_;
4523   AssertPolicy assert_policy_;
4524 #endif
4525 };
4526 
4527 }  // namespace vixl
4528 
4529 #endif  // VIXL_A64_ASSEMBLER_A64_H_
4530