1 // Copyright 2015, ARM Limited
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are met:
6 //
7 // * Redistributions of source code must retain the above copyright notice,
8 // this list of conditions and the following disclaimer.
9 // * Redistributions in binary form must reproduce the above copyright notice,
10 // this list of conditions and the following disclaimer in the documentation
11 // and/or other materials provided with the distribution.
12 // * Neither the name of ARM Limited nor the names of its contributors may be
13 // used to endorse or promote products derived from this software without
14 // specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
27 #ifndef VIXL_A64_ASSEMBLER_A64_H_
28 #define VIXL_A64_ASSEMBLER_A64_H_
29
30
31 #include "vixl/globals.h"
32 #include "vixl/invalset.h"
33 #include "vixl/utils.h"
34 #include "vixl/code-buffer.h"
35 #include "vixl/a64/instructions-a64.h"
36
37 namespace vixl {
38
39 typedef uint64_t RegList;
40 static const int kRegListSizeInBits = sizeof(RegList) * 8;
41
42
43 // Registers.
44
45 // Some CPURegister methods can return Register or VRegister types, so we need
46 // to declare them in advance.
47 class Register;
48 class VRegister;
49
50 class CPURegister {
51 public:
52 enum RegisterType {
53 // The kInvalid value is used to detect uninitialized static instances,
54 // which are always zero-initialized before any constructors are called.
55 kInvalid = 0,
56 kRegister,
57 kVRegister,
58 kFPRegister = kVRegister,
59 kNoRegister
60 };
61
CPURegister()62 CPURegister() : code_(0), size_(0), type_(kNoRegister) {
63 VIXL_ASSERT(!IsValid());
64 VIXL_ASSERT(IsNone());
65 }
66
CPURegister(unsigned code,unsigned size,RegisterType type)67 CPURegister(unsigned code, unsigned size, RegisterType type)
68 : code_(code), size_(size), type_(type) {
69 VIXL_ASSERT(IsValidOrNone());
70 }
71
code()72 unsigned code() const {
73 VIXL_ASSERT(IsValid());
74 return code_;
75 }
76
type()77 RegisterType type() const {
78 VIXL_ASSERT(IsValidOrNone());
79 return type_;
80 }
81
Bit()82 RegList Bit() const {
83 VIXL_ASSERT(code_ < (sizeof(RegList) * 8));
84 return IsValid() ? (static_cast<RegList>(1) << code_) : 0;
85 }
86
size()87 unsigned size() const {
88 VIXL_ASSERT(IsValid());
89 return size_;
90 }
91
SizeInBytes()92 int SizeInBytes() const {
93 VIXL_ASSERT(IsValid());
94 VIXL_ASSERT(size() % 8 == 0);
95 return size_ / 8;
96 }
97
SizeInBits()98 int SizeInBits() const {
99 VIXL_ASSERT(IsValid());
100 return size_;
101 }
102
Is8Bits()103 bool Is8Bits() const {
104 VIXL_ASSERT(IsValid());
105 return size_ == 8;
106 }
107
Is16Bits()108 bool Is16Bits() const {
109 VIXL_ASSERT(IsValid());
110 return size_ == 16;
111 }
112
Is32Bits()113 bool Is32Bits() const {
114 VIXL_ASSERT(IsValid());
115 return size_ == 32;
116 }
117
Is64Bits()118 bool Is64Bits() const {
119 VIXL_ASSERT(IsValid());
120 return size_ == 64;
121 }
122
Is128Bits()123 bool Is128Bits() const {
124 VIXL_ASSERT(IsValid());
125 return size_ == 128;
126 }
127
IsValid()128 bool IsValid() const {
129 if (IsValidRegister() || IsValidVRegister()) {
130 VIXL_ASSERT(!IsNone());
131 return true;
132 } else {
133 VIXL_ASSERT(IsNone());
134 return false;
135 }
136 }
137
IsValidRegister()138 bool IsValidRegister() const {
139 return IsRegister() &&
140 ((size_ == kWRegSize) || (size_ == kXRegSize)) &&
141 ((code_ < kNumberOfRegisters) || (code_ == kSPRegInternalCode));
142 }
143
IsValidVRegister()144 bool IsValidVRegister() const {
145 return IsVRegister() &&
146 ((size_ == kBRegSize) || (size_ == kHRegSize) ||
147 (size_ == kSRegSize) || (size_ == kDRegSize) ||
148 (size_ == kQRegSize)) &&
149 (code_ < kNumberOfVRegisters);
150 }
151
IsValidFPRegister()152 bool IsValidFPRegister() const {
153 return IsFPRegister() && (code_ < kNumberOfVRegisters);
154 }
155
IsNone()156 bool IsNone() const {
157 // kNoRegister types should always have size 0 and code 0.
158 VIXL_ASSERT((type_ != kNoRegister) || (code_ == 0));
159 VIXL_ASSERT((type_ != kNoRegister) || (size_ == 0));
160
161 return type_ == kNoRegister;
162 }
163
Aliases(const CPURegister & other)164 bool Aliases(const CPURegister& other) const {
165 VIXL_ASSERT(IsValidOrNone() && other.IsValidOrNone());
166 return (code_ == other.code_) && (type_ == other.type_);
167 }
168
Is(const CPURegister & other)169 bool Is(const CPURegister& other) const {
170 VIXL_ASSERT(IsValidOrNone() && other.IsValidOrNone());
171 return Aliases(other) && (size_ == other.size_);
172 }
173
IsZero()174 bool IsZero() const {
175 VIXL_ASSERT(IsValid());
176 return IsRegister() && (code_ == kZeroRegCode);
177 }
178
IsSP()179 bool IsSP() const {
180 VIXL_ASSERT(IsValid());
181 return IsRegister() && (code_ == kSPRegInternalCode);
182 }
183
IsRegister()184 bool IsRegister() const {
185 return type_ == kRegister;
186 }
187
IsVRegister()188 bool IsVRegister() const {
189 return type_ == kVRegister;
190 }
191
IsFPRegister()192 bool IsFPRegister() const {
193 return IsS() || IsD();
194 }
195
IsW()196 bool IsW() const { return IsValidRegister() && Is32Bits(); }
IsX()197 bool IsX() const { return IsValidRegister() && Is64Bits(); }
198
199 // These assertions ensure that the size and type of the register are as
200 // described. They do not consider the number of lanes that make up a vector.
201 // So, for example, Is8B() implies IsD(), and Is1D() implies IsD, but IsD()
202 // does not imply Is1D() or Is8B().
203 // Check the number of lanes, ie. the format of the vector, using methods such
204 // as Is8B(), Is1D(), etc. in the VRegister class.
IsV()205 bool IsV() const { return IsVRegister(); }
IsB()206 bool IsB() const { return IsV() && Is8Bits(); }
IsH()207 bool IsH() const { return IsV() && Is16Bits(); }
IsS()208 bool IsS() const { return IsV() && Is32Bits(); }
IsD()209 bool IsD() const { return IsV() && Is64Bits(); }
IsQ()210 bool IsQ() const { return IsV() && Is128Bits(); }
211
212 const Register& W() const;
213 const Register& X() const;
214 const VRegister& V() const;
215 const VRegister& B() const;
216 const VRegister& H() const;
217 const VRegister& S() const;
218 const VRegister& D() const;
219 const VRegister& Q() const;
220
IsSameSizeAndType(const CPURegister & other)221 bool IsSameSizeAndType(const CPURegister& other) const {
222 return (size_ == other.size_) && (type_ == other.type_);
223 }
224
225 protected:
226 unsigned code_;
227 unsigned size_;
228 RegisterType type_;
229
230 private:
IsValidOrNone()231 bool IsValidOrNone() const {
232 return IsValid() || IsNone();
233 }
234 };
235
236
237 class Register : public CPURegister {
238 public:
Register()239 Register() : CPURegister() {}
Register(const CPURegister & other)240 explicit Register(const CPURegister& other)
241 : CPURegister(other.code(), other.size(), other.type()) {
242 VIXL_ASSERT(IsValidRegister());
243 }
Register(unsigned code,unsigned size)244 Register(unsigned code, unsigned size)
245 : CPURegister(code, size, kRegister) {}
246
IsValid()247 bool IsValid() const {
248 VIXL_ASSERT(IsRegister() || IsNone());
249 return IsValidRegister();
250 }
251
252 static const Register& WRegFromCode(unsigned code);
253 static const Register& XRegFromCode(unsigned code);
254
255 private:
256 static const Register wregisters[];
257 static const Register xregisters[];
258 };
259
260
261 class VRegister : public CPURegister {
262 public:
VRegister()263 VRegister() : CPURegister(), lanes_(1) {}
VRegister(const CPURegister & other)264 explicit VRegister(const CPURegister& other)
265 : CPURegister(other.code(), other.size(), other.type()), lanes_(1) {
266 VIXL_ASSERT(IsValidVRegister());
267 VIXL_ASSERT(IsPowerOf2(lanes_) && (lanes_ <= 16));
268 }
269 VRegister(unsigned code, unsigned size, unsigned lanes = 1)
CPURegister(code,size,kVRegister)270 : CPURegister(code, size, kVRegister), lanes_(lanes) {
271 VIXL_ASSERT(IsPowerOf2(lanes_) && (lanes_ <= 16));
272 }
VRegister(unsigned code,VectorFormat format)273 VRegister(unsigned code, VectorFormat format)
274 : CPURegister(code, RegisterSizeInBitsFromFormat(format), kVRegister),
275 lanes_(IsVectorFormat(format) ? LaneCountFromFormat(format) : 1) {
276 VIXL_ASSERT(IsPowerOf2(lanes_) && (lanes_ <= 16));
277 }
278
IsValid()279 bool IsValid() const {
280 VIXL_ASSERT(IsVRegister() || IsNone());
281 return IsValidVRegister();
282 }
283
284 static const VRegister& BRegFromCode(unsigned code);
285 static const VRegister& HRegFromCode(unsigned code);
286 static const VRegister& SRegFromCode(unsigned code);
287 static const VRegister& DRegFromCode(unsigned code);
288 static const VRegister& QRegFromCode(unsigned code);
289 static const VRegister& VRegFromCode(unsigned code);
290
V8B()291 VRegister V8B() const { return VRegister(code_, kDRegSize, 8); }
V16B()292 VRegister V16B() const { return VRegister(code_, kQRegSize, 16); }
V4H()293 VRegister V4H() const { return VRegister(code_, kDRegSize, 4); }
V8H()294 VRegister V8H() const { return VRegister(code_, kQRegSize, 8); }
V2S()295 VRegister V2S() const { return VRegister(code_, kDRegSize, 2); }
V4S()296 VRegister V4S() const { return VRegister(code_, kQRegSize, 4); }
V2D()297 VRegister V2D() const { return VRegister(code_, kQRegSize, 2); }
V1D()298 VRegister V1D() const { return VRegister(code_, kDRegSize, 1); }
299
Is8B()300 bool Is8B() const { return (Is64Bits() && (lanes_ == 8)); }
Is16B()301 bool Is16B() const { return (Is128Bits() && (lanes_ == 16)); }
Is4H()302 bool Is4H() const { return (Is64Bits() && (lanes_ == 4)); }
Is8H()303 bool Is8H() const { return (Is128Bits() && (lanes_ == 8)); }
Is2S()304 bool Is2S() const { return (Is64Bits() && (lanes_ == 2)); }
Is4S()305 bool Is4S() const { return (Is128Bits() && (lanes_ == 4)); }
Is1D()306 bool Is1D() const { return (Is64Bits() && (lanes_ == 1)); }
Is2D()307 bool Is2D() const { return (Is128Bits() && (lanes_ == 2)); }
308
309 // For consistency, we assert the number of lanes of these scalar registers,
310 // even though there are no vectors of equivalent total size with which they
311 // could alias.
Is1B()312 bool Is1B() const {
313 VIXL_ASSERT(!(Is8Bits() && IsVector()));
314 return Is8Bits();
315 }
Is1H()316 bool Is1H() const {
317 VIXL_ASSERT(!(Is16Bits() && IsVector()));
318 return Is16Bits();
319 }
Is1S()320 bool Is1S() const {
321 VIXL_ASSERT(!(Is32Bits() && IsVector()));
322 return Is32Bits();
323 }
324
IsLaneSizeB()325 bool IsLaneSizeB() const { return LaneSizeInBits() == kBRegSize; }
IsLaneSizeH()326 bool IsLaneSizeH() const { return LaneSizeInBits() == kHRegSize; }
IsLaneSizeS()327 bool IsLaneSizeS() const { return LaneSizeInBits() == kSRegSize; }
IsLaneSizeD()328 bool IsLaneSizeD() const { return LaneSizeInBits() == kDRegSize; }
329
lanes()330 int lanes() const {
331 return lanes_;
332 }
333
IsScalar()334 bool IsScalar() const {
335 return lanes_ == 1;
336 }
337
IsVector()338 bool IsVector() const {
339 return lanes_ > 1;
340 }
341
IsSameFormat(const VRegister & other)342 bool IsSameFormat(const VRegister& other) const {
343 return (size_ == other.size_) && (lanes_ == other.lanes_);
344 }
345
LaneSizeInBytes()346 unsigned LaneSizeInBytes() const {
347 return SizeInBytes() / lanes_;
348 }
349
LaneSizeInBits()350 unsigned LaneSizeInBits() const {
351 return LaneSizeInBytes() * 8;
352 }
353
354 private:
355 static const VRegister bregisters[];
356 static const VRegister hregisters[];
357 static const VRegister sregisters[];
358 static const VRegister dregisters[];
359 static const VRegister qregisters[];
360 static const VRegister vregisters[];
361 int lanes_;
362 };
363
364
365 // Backward compatibility for FPRegisters.
366 typedef VRegister FPRegister;
367
368 // No*Reg is used to indicate an unused argument, or an error case. Note that
369 // these all compare equal (using the Is() method). The Register and VRegister
370 // variants are provided for convenience.
371 const Register NoReg;
372 const VRegister NoVReg;
373 const FPRegister NoFPReg; // For backward compatibility.
374 const CPURegister NoCPUReg;
375
376
377 #define DEFINE_REGISTERS(N) \
378 const Register w##N(N, kWRegSize); \
379 const Register x##N(N, kXRegSize);
380 REGISTER_CODE_LIST(DEFINE_REGISTERS)
381 #undef DEFINE_REGISTERS
382 const Register wsp(kSPRegInternalCode, kWRegSize);
383 const Register sp(kSPRegInternalCode, kXRegSize);
384
385
386 #define DEFINE_VREGISTERS(N) \
387 const VRegister b##N(N, kBRegSize); \
388 const VRegister h##N(N, kHRegSize); \
389 const VRegister s##N(N, kSRegSize); \
390 const VRegister d##N(N, kDRegSize); \
391 const VRegister q##N(N, kQRegSize); \
392 const VRegister v##N(N, kQRegSize);
393 REGISTER_CODE_LIST(DEFINE_VREGISTERS)
394 #undef DEFINE_VREGISTERS
395
396
397 // Registers aliases.
398 const Register ip0 = x16;
399 const Register ip1 = x17;
400 const Register lr = x30;
401 const Register xzr = x31;
402 const Register wzr = w31;
403
404
405 // AreAliased returns true if any of the named registers overlap. Arguments
406 // set to NoReg are ignored. The system stack pointer may be specified.
407 bool AreAliased(const CPURegister& reg1,
408 const CPURegister& reg2,
409 const CPURegister& reg3 = NoReg,
410 const CPURegister& reg4 = NoReg,
411 const CPURegister& reg5 = NoReg,
412 const CPURegister& reg6 = NoReg,
413 const CPURegister& reg7 = NoReg,
414 const CPURegister& reg8 = NoReg);
415
416
417 // AreSameSizeAndType returns true if all of the specified registers have the
418 // same size, and are of the same type. The system stack pointer may be
419 // specified. Arguments set to NoReg are ignored, as are any subsequent
420 // arguments. At least one argument (reg1) must be valid (not NoCPUReg).
421 bool AreSameSizeAndType(const CPURegister& reg1,
422 const CPURegister& reg2,
423 const CPURegister& reg3 = NoCPUReg,
424 const CPURegister& reg4 = NoCPUReg,
425 const CPURegister& reg5 = NoCPUReg,
426 const CPURegister& reg6 = NoCPUReg,
427 const CPURegister& reg7 = NoCPUReg,
428 const CPURegister& reg8 = NoCPUReg);
429
430
431 // AreSameFormat returns true if all of the specified VRegisters have the same
432 // vector format. Arguments set to NoReg are ignored, as are any subsequent
433 // arguments. At least one argument (reg1) must be valid (not NoVReg).
434 bool AreSameFormat(const VRegister& reg1,
435 const VRegister& reg2,
436 const VRegister& reg3 = NoVReg,
437 const VRegister& reg4 = NoVReg);
438
439
440 // AreConsecutive returns true if all of the specified VRegisters are
441 // consecutive in the register file. Arguments set to NoReg are ignored, as are
442 // any subsequent arguments. At least one argument (reg1) must be valid
443 // (not NoVReg).
444 bool AreConsecutive(const VRegister& reg1,
445 const VRegister& reg2,
446 const VRegister& reg3 = NoVReg,
447 const VRegister& reg4 = NoVReg);
448
449
450 // Lists of registers.
451 class CPURegList {
452 public:
453 explicit CPURegList(CPURegister reg1,
454 CPURegister reg2 = NoCPUReg,
455 CPURegister reg3 = NoCPUReg,
456 CPURegister reg4 = NoCPUReg)
457 : list_(reg1.Bit() | reg2.Bit() | reg3.Bit() | reg4.Bit()),
458 size_(reg1.size()), type_(reg1.type()) {
459 VIXL_ASSERT(AreSameSizeAndType(reg1, reg2, reg3, reg4));
460 VIXL_ASSERT(IsValid());
461 }
462
CPURegList(CPURegister::RegisterType type,unsigned size,RegList list)463 CPURegList(CPURegister::RegisterType type, unsigned size, RegList list)
464 : list_(list), size_(size), type_(type) {
465 VIXL_ASSERT(IsValid());
466 }
467
CPURegList(CPURegister::RegisterType type,unsigned size,unsigned first_reg,unsigned last_reg)468 CPURegList(CPURegister::RegisterType type, unsigned size,
469 unsigned first_reg, unsigned last_reg)
470 : size_(size), type_(type) {
471 VIXL_ASSERT(((type == CPURegister::kRegister) &&
472 (last_reg < kNumberOfRegisters)) ||
473 ((type == CPURegister::kVRegister) &&
474 (last_reg < kNumberOfVRegisters)));
475 VIXL_ASSERT(last_reg >= first_reg);
476 list_ = (UINT64_C(1) << (last_reg + 1)) - 1;
477 list_ &= ~((UINT64_C(1) << first_reg) - 1);
478 VIXL_ASSERT(IsValid());
479 }
480
type()481 CPURegister::RegisterType type() const {
482 VIXL_ASSERT(IsValid());
483 return type_;
484 }
485
486 // Combine another CPURegList into this one. Registers that already exist in
487 // this list are left unchanged. The type and size of the registers in the
488 // 'other' list must match those in this list.
Combine(const CPURegList & other)489 void Combine(const CPURegList& other) {
490 VIXL_ASSERT(IsValid());
491 VIXL_ASSERT(other.type() == type_);
492 VIXL_ASSERT(other.RegisterSizeInBits() == size_);
493 list_ |= other.list();
494 }
495
496 // Remove every register in the other CPURegList from this one. Registers that
497 // do not exist in this list are ignored. The type and size of the registers
498 // in the 'other' list must match those in this list.
Remove(const CPURegList & other)499 void Remove(const CPURegList& other) {
500 VIXL_ASSERT(IsValid());
501 VIXL_ASSERT(other.type() == type_);
502 VIXL_ASSERT(other.RegisterSizeInBits() == size_);
503 list_ &= ~other.list();
504 }
505
506 // Variants of Combine and Remove which take a single register.
Combine(const CPURegister & other)507 void Combine(const CPURegister& other) {
508 VIXL_ASSERT(other.type() == type_);
509 VIXL_ASSERT(other.size() == size_);
510 Combine(other.code());
511 }
512
Remove(const CPURegister & other)513 void Remove(const CPURegister& other) {
514 VIXL_ASSERT(other.type() == type_);
515 VIXL_ASSERT(other.size() == size_);
516 Remove(other.code());
517 }
518
519 // Variants of Combine and Remove which take a single register by its code;
520 // the type and size of the register is inferred from this list.
Combine(int code)521 void Combine(int code) {
522 VIXL_ASSERT(IsValid());
523 VIXL_ASSERT(CPURegister(code, size_, type_).IsValid());
524 list_ |= (UINT64_C(1) << code);
525 }
526
Remove(int code)527 void Remove(int code) {
528 VIXL_ASSERT(IsValid());
529 VIXL_ASSERT(CPURegister(code, size_, type_).IsValid());
530 list_ &= ~(UINT64_C(1) << code);
531 }
532
Union(const CPURegList & list_1,const CPURegList & list_2)533 static CPURegList Union(const CPURegList& list_1, const CPURegList& list_2) {
534 VIXL_ASSERT(list_1.type_ == list_2.type_);
535 VIXL_ASSERT(list_1.size_ == list_2.size_);
536 return CPURegList(list_1.type_, list_1.size_, list_1.list_ | list_2.list_);
537 }
538 static CPURegList Union(const CPURegList& list_1,
539 const CPURegList& list_2,
540 const CPURegList& list_3);
541 static CPURegList Union(const CPURegList& list_1,
542 const CPURegList& list_2,
543 const CPURegList& list_3,
544 const CPURegList& list_4);
545
Intersection(const CPURegList & list_1,const CPURegList & list_2)546 static CPURegList Intersection(const CPURegList& list_1,
547 const CPURegList& list_2) {
548 VIXL_ASSERT(list_1.type_ == list_2.type_);
549 VIXL_ASSERT(list_1.size_ == list_2.size_);
550 return CPURegList(list_1.type_, list_1.size_, list_1.list_ & list_2.list_);
551 }
552 static CPURegList Intersection(const CPURegList& list_1,
553 const CPURegList& list_2,
554 const CPURegList& list_3);
555 static CPURegList Intersection(const CPURegList& list_1,
556 const CPURegList& list_2,
557 const CPURegList& list_3,
558 const CPURegList& list_4);
559
Overlaps(const CPURegList & other)560 bool Overlaps(const CPURegList& other) const {
561 return (type_ == other.type_) && ((list_ & other.list_) != 0);
562 }
563
list()564 RegList list() const {
565 VIXL_ASSERT(IsValid());
566 return list_;
567 }
568
set_list(RegList new_list)569 void set_list(RegList new_list) {
570 VIXL_ASSERT(IsValid());
571 list_ = new_list;
572 }
573
574 // Remove all callee-saved registers from the list. This can be useful when
575 // preparing registers for an AAPCS64 function call, for example.
576 void RemoveCalleeSaved();
577
578 CPURegister PopLowestIndex();
579 CPURegister PopHighestIndex();
580
581 // AAPCS64 callee-saved registers.
582 static CPURegList GetCalleeSaved(unsigned size = kXRegSize);
583 static CPURegList GetCalleeSavedV(unsigned size = kDRegSize);
584
585 // AAPCS64 caller-saved registers. Note that this includes lr.
586 // TODO(all): Determine how we handle d8-d15 being callee-saved, but the top
587 // 64-bits being caller-saved.
588 static CPURegList GetCallerSaved(unsigned size = kXRegSize);
589 static CPURegList GetCallerSavedV(unsigned size = kDRegSize);
590
IsEmpty()591 bool IsEmpty() const {
592 VIXL_ASSERT(IsValid());
593 return list_ == 0;
594 }
595
IncludesAliasOf(const CPURegister & other)596 bool IncludesAliasOf(const CPURegister& other) const {
597 VIXL_ASSERT(IsValid());
598 return (type_ == other.type()) && ((other.Bit() & list_) != 0);
599 }
600
IncludesAliasOf(int code)601 bool IncludesAliasOf(int code) const {
602 VIXL_ASSERT(IsValid());
603 return ((code & list_) != 0);
604 }
605
Count()606 int Count() const {
607 VIXL_ASSERT(IsValid());
608 return CountSetBits(list_);
609 }
610
RegisterSizeInBits()611 unsigned RegisterSizeInBits() const {
612 VIXL_ASSERT(IsValid());
613 return size_;
614 }
615
RegisterSizeInBytes()616 unsigned RegisterSizeInBytes() const {
617 int size_in_bits = RegisterSizeInBits();
618 VIXL_ASSERT((size_in_bits % 8) == 0);
619 return size_in_bits / 8;
620 }
621
TotalSizeInBytes()622 unsigned TotalSizeInBytes() const {
623 VIXL_ASSERT(IsValid());
624 return RegisterSizeInBytes() * Count();
625 }
626
627 private:
628 RegList list_;
629 unsigned size_;
630 CPURegister::RegisterType type_;
631
632 bool IsValid() const;
633 };
634
635
636 // AAPCS64 callee-saved registers.
637 extern const CPURegList kCalleeSaved;
638 extern const CPURegList kCalleeSavedV;
639
640
641 // AAPCS64 caller-saved registers. Note that this includes lr.
642 extern const CPURegList kCallerSaved;
643 extern const CPURegList kCallerSavedV;
644
645
646 // Operand.
647 class Operand {
648 public:
649 // #<immediate>
650 // where <immediate> is int64_t.
651 // This is allowed to be an implicit constructor because Operand is
652 // a wrapper class that doesn't normally perform any type conversion.
653 Operand(int64_t immediate = 0); // NOLINT(runtime/explicit)
654
655 // rm, {<shift> #<shift_amount>}
656 // where <shift> is one of {LSL, LSR, ASR, ROR}.
657 // <shift_amount> is uint6_t.
658 // This is allowed to be an implicit constructor because Operand is
659 // a wrapper class that doesn't normally perform any type conversion.
660 Operand(Register reg,
661 Shift shift = LSL,
662 unsigned shift_amount = 0); // NOLINT(runtime/explicit)
663
664 // rm, {<extend> {#<shift_amount>}}
665 // where <extend> is one of {UXTB, UXTH, UXTW, UXTX, SXTB, SXTH, SXTW, SXTX}.
666 // <shift_amount> is uint2_t.
667 explicit Operand(Register reg, Extend extend, unsigned shift_amount = 0);
668
669 bool IsImmediate() const;
670 bool IsShiftedRegister() const;
671 bool IsExtendedRegister() const;
672 bool IsZero() const;
673
674 // This returns an LSL shift (<= 4) operand as an equivalent extend operand,
675 // which helps in the encoding of instructions that use the stack pointer.
676 Operand ToExtendedRegister() const;
677
immediate()678 int64_t immediate() const {
679 VIXL_ASSERT(IsImmediate());
680 return immediate_;
681 }
682
reg()683 Register reg() const {
684 VIXL_ASSERT(IsShiftedRegister() || IsExtendedRegister());
685 return reg_;
686 }
687
shift()688 Shift shift() const {
689 VIXL_ASSERT(IsShiftedRegister());
690 return shift_;
691 }
692
extend()693 Extend extend() const {
694 VIXL_ASSERT(IsExtendedRegister());
695 return extend_;
696 }
697
shift_amount()698 unsigned shift_amount() const {
699 VIXL_ASSERT(IsShiftedRegister() || IsExtendedRegister());
700 return shift_amount_;
701 }
702
703 private:
704 int64_t immediate_;
705 Register reg_;
706 Shift shift_;
707 Extend extend_;
708 unsigned shift_amount_;
709 };
710
711
712 // MemOperand represents the addressing mode of a load or store instruction.
713 class MemOperand {
714 public:
715 explicit MemOperand(Register base,
716 int64_t offset = 0,
717 AddrMode addrmode = Offset);
718 MemOperand(Register base,
719 Register regoffset,
720 Shift shift = LSL,
721 unsigned shift_amount = 0);
722 MemOperand(Register base,
723 Register regoffset,
724 Extend extend,
725 unsigned shift_amount = 0);
726 MemOperand(Register base,
727 const Operand& offset,
728 AddrMode addrmode = Offset);
729
base()730 const Register& base() const { return base_; }
regoffset()731 const Register& regoffset() const { return regoffset_; }
offset()732 int64_t offset() const { return offset_; }
addrmode()733 AddrMode addrmode() const { return addrmode_; }
shift()734 Shift shift() const { return shift_; }
extend()735 Extend extend() const { return extend_; }
shift_amount()736 unsigned shift_amount() const { return shift_amount_; }
737 bool IsImmediateOffset() const;
738 bool IsRegisterOffset() const;
739 bool IsPreIndex() const;
740 bool IsPostIndex() const;
741
742 void AddOffset(int64_t offset);
743
744 private:
745 Register base_;
746 Register regoffset_;
747 int64_t offset_;
748 AddrMode addrmode_;
749 Shift shift_;
750 Extend extend_;
751 unsigned shift_amount_;
752 };
753
754
755 class LabelTestHelper; // Forward declaration.
756
757
758 class Label {
759 public:
Label()760 Label() : location_(kLocationUnbound) {}
~Label()761 ~Label() {
762 // If the label has been linked to, it needs to be bound to a target.
763 VIXL_ASSERT(!IsLinked() || IsBound());
764 }
765
IsBound()766 bool IsBound() const { return location_ >= 0; }
IsLinked()767 bool IsLinked() const { return !links_.empty(); }
768
location()769 ptrdiff_t location() const { return location_; }
770
771 static const int kNPreallocatedLinks = 4;
772 static const ptrdiff_t kInvalidLinkKey = PTRDIFF_MAX;
773 static const size_t kReclaimFrom = 512;
774 static const size_t kReclaimFactor = 2;
775
776 typedef InvalSet<ptrdiff_t,
777 kNPreallocatedLinks,
778 ptrdiff_t,
779 kInvalidLinkKey,
780 kReclaimFrom,
781 kReclaimFactor> LinksSetBase;
782 typedef InvalSetIterator<LinksSetBase> LabelLinksIteratorBase;
783
784 private:
785 class LinksSet : public LinksSetBase {
786 public:
LinksSet()787 LinksSet() : LinksSetBase() {}
788 };
789
790 // Allows iterating over the links of a label. The behaviour is undefined if
791 // the list of links is modified in any way while iterating.
792 class LabelLinksIterator : public LabelLinksIteratorBase {
793 public:
LabelLinksIterator(Label * label)794 explicit LabelLinksIterator(Label* label)
795 : LabelLinksIteratorBase(&label->links_) {}
796 };
797
Bind(ptrdiff_t location)798 void Bind(ptrdiff_t location) {
799 // Labels can only be bound once.
800 VIXL_ASSERT(!IsBound());
801 location_ = location;
802 }
803
AddLink(ptrdiff_t instruction)804 void AddLink(ptrdiff_t instruction) {
805 // If a label is bound, the assembler already has the information it needs
806 // to write the instruction, so there is no need to add it to links_.
807 VIXL_ASSERT(!IsBound());
808 links_.insert(instruction);
809 }
810
DeleteLink(ptrdiff_t instruction)811 void DeleteLink(ptrdiff_t instruction) {
812 links_.erase(instruction);
813 }
814
ClearAllLinks()815 void ClearAllLinks() {
816 links_.clear();
817 }
818
819 // TODO: The comment below considers average case complexity for our
820 // usual use-cases. The elements of interest are:
821 // - Branches to a label are emitted in order: branch instructions to a label
822 // are generated at an offset in the code generation buffer greater than any
823 // other branch to that same label already generated. As an example, this can
824 // be broken when an instruction is patched to become a branch. Note that the
825 // code will still work, but the complexity considerations below may locally
826 // not apply any more.
827 // - Veneers are generated in order: for multiple branches of the same type
828 // branching to the same unbound label going out of range, veneers are
829 // generated in growing order of the branch instruction offset from the start
830 // of the buffer.
831 //
832 // When creating a veneer for a branch going out of range, the link for this
833 // branch needs to be removed from this `links_`. Since all branches are
834 // tracked in one underlying InvalSet, the complexity for this deletion is the
835 // same as for finding the element, ie. O(n), where n is the number of links
836 // in the set.
837 // This could be reduced to O(1) by using the same trick as used when tracking
838 // branch information for veneers: split the container to use one set per type
839 // of branch. With that setup, when a veneer is created and the link needs to
840 // be deleted, if the two points above hold, it must be the minimum element of
841 // the set for its type of branch, and that minimum element will be accessible
842 // in O(1).
843
844 // The offsets of the instructions that have linked to this label.
845 LinksSet links_;
846 // The label location.
847 ptrdiff_t location_;
848
849 static const ptrdiff_t kLocationUnbound = -1;
850
851 // It is not safe to copy labels, so disable the copy constructor and operator
852 // by declaring them private (without an implementation).
853 Label(const Label&);
854 void operator=(const Label&);
855
856 // The Assembler class is responsible for binding and linking labels, since
857 // the stored offsets need to be consistent with the Assembler's buffer.
858 friend class Assembler;
859 // The MacroAssembler and VeneerPool handle resolution of branches to distant
860 // targets.
861 friend class MacroAssembler;
862 friend class VeneerPool;
863 };
864
865
866 // Required InvalSet template specialisations.
867 #define INVAL_SET_TEMPLATE_PARAMETERS \
868 ptrdiff_t, \
869 Label::kNPreallocatedLinks, \
870 ptrdiff_t, \
871 Label::kInvalidLinkKey, \
872 Label::kReclaimFrom, \
873 Label::kReclaimFactor
874 template<>
Key(const ptrdiff_t & element)875 inline ptrdiff_t InvalSet<INVAL_SET_TEMPLATE_PARAMETERS>::Key(
876 const ptrdiff_t& element) {
877 return element;
878 }
879 template<>
SetKey(ptrdiff_t * element,ptrdiff_t key)880 inline void InvalSet<INVAL_SET_TEMPLATE_PARAMETERS>::SetKey(
881 ptrdiff_t* element, ptrdiff_t key) {
882 *element = key;
883 }
884 #undef INVAL_SET_TEMPLATE_PARAMETERS
885
886
887 // A literal is a 32-bit or 64-bit piece of data stored in the instruction
888 // stream and loaded through a pc relative load. The same literal can be
889 // referred to by multiple instructions but a literal can only reside at one
890 // place in memory. A literal can be used by a load before or after being
891 // placed in memory.
892 //
893 // Internally an offset of 0 is associated with a literal which has been
894 // neither used nor placed. Then two possibilities arise:
895 // 1) the label is placed, the offset (stored as offset + 1) is used to
896 // resolve any subsequent load using the label.
897 // 2) the label is not placed and offset is the offset of the last load using
898 // the literal (stored as -offset -1). If multiple loads refer to this
899 // literal then the last load holds the offset of the preceding load and
900 // all loads form a chain. Once the offset is placed all the loads in the
901 // chain are resolved and future loads fall back to possibility 1.
902 class RawLiteral {
903 public:
RawLiteral()904 RawLiteral() : size_(0), offset_(0), low64_(0), high64_(0) {}
905
size()906 size_t size() {
907 VIXL_STATIC_ASSERT(kDRegSizeInBytes == kXRegSizeInBytes);
908 VIXL_STATIC_ASSERT(kSRegSizeInBytes == kWRegSizeInBytes);
909 VIXL_ASSERT((size_ == kXRegSizeInBytes) ||
910 (size_ == kWRegSizeInBytes) ||
911 (size_ == kQRegSizeInBytes));
912 return size_;
913 }
raw_value128_low64()914 uint64_t raw_value128_low64() {
915 VIXL_ASSERT(size_ == kQRegSizeInBytes);
916 return low64_;
917 }
raw_value128_high64()918 uint64_t raw_value128_high64() {
919 VIXL_ASSERT(size_ == kQRegSizeInBytes);
920 return high64_;
921 }
raw_value64()922 uint64_t raw_value64() {
923 VIXL_ASSERT(size_ == kXRegSizeInBytes);
924 VIXL_ASSERT(high64_ == 0);
925 return low64_;
926 }
raw_value32()927 uint32_t raw_value32() {
928 VIXL_ASSERT(size_ == kWRegSizeInBytes);
929 VIXL_ASSERT(high64_ == 0);
930 VIXL_ASSERT(is_uint32(low64_) || is_int32(low64_));
931 return static_cast<uint32_t>(low64_);
932 }
IsUsed()933 bool IsUsed() { return offset_ < 0; }
IsPlaced()934 bool IsPlaced() { return offset_ > 0; }
935
936 protected:
offset()937 ptrdiff_t offset() {
938 VIXL_ASSERT(IsPlaced());
939 return offset_ - 1;
940 }
set_offset(ptrdiff_t offset)941 void set_offset(ptrdiff_t offset) {
942 VIXL_ASSERT(offset >= 0);
943 VIXL_ASSERT(IsWordAligned(offset));
944 VIXL_ASSERT(!IsPlaced());
945 offset_ = offset + 1;
946 }
last_use()947 ptrdiff_t last_use() {
948 VIXL_ASSERT(IsUsed());
949 return -offset_ - 1;
950 }
set_last_use(ptrdiff_t offset)951 void set_last_use(ptrdiff_t offset) {
952 VIXL_ASSERT(offset >= 0);
953 VIXL_ASSERT(IsWordAligned(offset));
954 VIXL_ASSERT(!IsPlaced());
955 offset_ = -offset - 1;
956 }
957
958 size_t size_;
959 ptrdiff_t offset_;
960 uint64_t low64_;
961 uint64_t high64_;
962
963 friend class Assembler;
964 };
965
966
967 template <typename T>
968 class Literal : public RawLiteral {
969 public:
Literal(T value)970 explicit Literal(T value) {
971 VIXL_STATIC_ASSERT(sizeof(T) <= kXRegSizeInBytes);
972 size_ = sizeof(value);
973 memcpy(&low64_, &value, sizeof(value));
974 }
975
Literal(T high64,T low64)976 Literal(T high64, T low64) {
977 VIXL_STATIC_ASSERT(sizeof(T) == (kQRegSizeInBytes / 2));
978 size_ = kQRegSizeInBytes;
979 memcpy(&low64_, &low64, sizeof(low64));
980 memcpy(&high64_, &high64, sizeof(high64));
981 }
982 };
983
984
985 // Control whether or not position-independent code should be emitted.
986 enum PositionIndependentCodeOption {
987 // All code generated will be position-independent; all branches and
988 // references to labels generated with the Label class will use PC-relative
989 // addressing.
990 PositionIndependentCode,
991
992 // Allow VIXL to generate code that refers to absolute addresses. With this
993 // option, it will not be possible to copy the code buffer and run it from a
994 // different address; code must be generated in its final location.
995 PositionDependentCode,
996
997 // Allow VIXL to assume that the bottom 12 bits of the address will be
998 // constant, but that the top 48 bits may change. This allows `adrp` to
999 // function in systems which copy code between pages, but otherwise maintain
1000 // 4KB page alignment.
1001 PageOffsetDependentCode
1002 };
1003
1004
1005 // Control how scaled- and unscaled-offset loads and stores are generated.
1006 enum LoadStoreScalingOption {
1007 // Prefer scaled-immediate-offset instructions, but emit unscaled-offset,
1008 // register-offset, pre-index or post-index instructions if necessary.
1009 PreferScaledOffset,
1010
1011 // Prefer unscaled-immediate-offset instructions, but emit scaled-offset,
1012 // register-offset, pre-index or post-index instructions if necessary.
1013 PreferUnscaledOffset,
1014
1015 // Require scaled-immediate-offset instructions.
1016 RequireScaledOffset,
1017
1018 // Require unscaled-immediate-offset instructions.
1019 RequireUnscaledOffset
1020 };
1021
1022
1023 // Assembler.
1024 class Assembler {
1025 public:
1026 Assembler(size_t capacity,
1027 PositionIndependentCodeOption pic = PositionIndependentCode);
1028 Assembler(byte* buffer, size_t capacity,
1029 PositionIndependentCodeOption pic = PositionIndependentCode);
1030
1031 // The destructor asserts that one of the following is true:
1032 // * The Assembler object has not been used.
1033 // * Nothing has been emitted since the last Reset() call.
1034 // * Nothing has been emitted since the last FinalizeCode() call.
1035 ~Assembler();
1036
1037 // System functions.
1038
1039 // Start generating code from the beginning of the buffer, discarding any code
1040 // and data that has already been emitted into the buffer.
1041 void Reset();
1042
1043 // Finalize a code buffer of generated instructions. This function must be
1044 // called before executing or copying code from the buffer.
1045 void FinalizeCode();
1046
1047 // Label.
1048 // Bind a label to the current PC.
1049 void bind(Label* label);
1050
1051 // Bind a label to a specified offset from the start of the buffer.
1052 void BindToOffset(Label* label, ptrdiff_t offset);
1053
1054 // Place a literal at the current PC.
1055 void place(RawLiteral* literal);
1056
CursorOffset()1057 ptrdiff_t CursorOffset() const {
1058 return buffer_->CursorOffset();
1059 }
1060
BufferEndOffset()1061 ptrdiff_t BufferEndOffset() const {
1062 return static_cast<ptrdiff_t>(buffer_->capacity());
1063 }
1064
1065 // Return the address of an offset in the buffer.
1066 template <typename T>
GetOffsetAddress(ptrdiff_t offset)1067 T GetOffsetAddress(ptrdiff_t offset) {
1068 VIXL_STATIC_ASSERT(sizeof(T) >= sizeof(uintptr_t));
1069 return buffer_->GetOffsetAddress<T>(offset);
1070 }
1071
1072 // Return the address of a bound label.
1073 template <typename T>
GetLabelAddress(const Label * label)1074 T GetLabelAddress(const Label * label) {
1075 VIXL_ASSERT(label->IsBound());
1076 VIXL_STATIC_ASSERT(sizeof(T) >= sizeof(uintptr_t));
1077 return GetOffsetAddress<T>(label->location());
1078 }
1079
1080 // Return the address of the cursor.
1081 template <typename T>
GetCursorAddress()1082 T GetCursorAddress() {
1083 VIXL_STATIC_ASSERT(sizeof(T) >= sizeof(uintptr_t));
1084 return GetOffsetAddress<T>(CursorOffset());
1085 }
1086
1087 // Return the address of the start of the buffer.
1088 template <typename T>
GetStartAddress()1089 T GetStartAddress() {
1090 VIXL_STATIC_ASSERT(sizeof(T) >= sizeof(uintptr_t));
1091 return GetOffsetAddress<T>(0);
1092 }
1093
InstructionAt(ptrdiff_t instruction_offset)1094 Instruction* InstructionAt(ptrdiff_t instruction_offset) {
1095 return GetOffsetAddress<Instruction*>(instruction_offset);
1096 }
1097
InstructionOffset(Instruction * instruction)1098 ptrdiff_t InstructionOffset(Instruction* instruction) {
1099 VIXL_STATIC_ASSERT(sizeof(*instruction) == 1);
1100 ptrdiff_t offset = instruction - GetStartAddress<Instruction*>();
1101 VIXL_ASSERT((0 <= offset) &&
1102 (offset < static_cast<ptrdiff_t>(BufferCapacity())));
1103 return offset;
1104 }
1105
1106 // Instruction set functions.
1107
1108 // Branch / Jump instructions.
1109 // Branch to register.
1110 void br(const Register& xn);
1111
1112 // Branch with link to register.
1113 void blr(const Register& xn);
1114
1115 // Branch to register with return hint.
1116 void ret(const Register& xn = lr);
1117
1118 // Unconditional branch to label.
1119 void b(Label* label);
1120
1121 // Conditional branch to label.
1122 void b(Label* label, Condition cond);
1123
1124 // Unconditional branch to PC offset.
1125 void b(int imm26);
1126
1127 // Conditional branch to PC offset.
1128 void b(int imm19, Condition cond);
1129
1130 // Branch with link to label.
1131 void bl(Label* label);
1132
1133 // Branch with link to PC offset.
1134 void bl(int imm26);
1135
1136 // Compare and branch to label if zero.
1137 void cbz(const Register& rt, Label* label);
1138
1139 // Compare and branch to PC offset if zero.
1140 void cbz(const Register& rt, int imm19);
1141
1142 // Compare and branch to label if not zero.
1143 void cbnz(const Register& rt, Label* label);
1144
1145 // Compare and branch to PC offset if not zero.
1146 void cbnz(const Register& rt, int imm19);
1147
1148 // Table lookup from one register.
1149 void tbl(const VRegister& vd,
1150 const VRegister& vn,
1151 const VRegister& vm);
1152
1153 // Table lookup from two registers.
1154 void tbl(const VRegister& vd,
1155 const VRegister& vn,
1156 const VRegister& vn2,
1157 const VRegister& vm);
1158
1159 // Table lookup from three registers.
1160 void tbl(const VRegister& vd,
1161 const VRegister& vn,
1162 const VRegister& vn2,
1163 const VRegister& vn3,
1164 const VRegister& vm);
1165
1166 // Table lookup from four registers.
1167 void tbl(const VRegister& vd,
1168 const VRegister& vn,
1169 const VRegister& vn2,
1170 const VRegister& vn3,
1171 const VRegister& vn4,
1172 const VRegister& vm);
1173
1174 // Table lookup extension from one register.
1175 void tbx(const VRegister& vd,
1176 const VRegister& vn,
1177 const VRegister& vm);
1178
1179 // Table lookup extension from two registers.
1180 void tbx(const VRegister& vd,
1181 const VRegister& vn,
1182 const VRegister& vn2,
1183 const VRegister& vm);
1184
1185 // Table lookup extension from three registers.
1186 void tbx(const VRegister& vd,
1187 const VRegister& vn,
1188 const VRegister& vn2,
1189 const VRegister& vn3,
1190 const VRegister& vm);
1191
1192 // Table lookup extension from four registers.
1193 void tbx(const VRegister& vd,
1194 const VRegister& vn,
1195 const VRegister& vn2,
1196 const VRegister& vn3,
1197 const VRegister& vn4,
1198 const VRegister& vm);
1199
1200 // Test bit and branch to label if zero.
1201 void tbz(const Register& rt, unsigned bit_pos, Label* label);
1202
1203 // Test bit and branch to PC offset if zero.
1204 void tbz(const Register& rt, unsigned bit_pos, int imm14);
1205
1206 // Test bit and branch to label if not zero.
1207 void tbnz(const Register& rt, unsigned bit_pos, Label* label);
1208
1209 // Test bit and branch to PC offset if not zero.
1210 void tbnz(const Register& rt, unsigned bit_pos, int imm14);
1211
1212 // Address calculation instructions.
1213 // Calculate a PC-relative address. Unlike for branches the offset in adr is
1214 // unscaled (i.e. the result can be unaligned).
1215
1216 // Calculate the address of a label.
1217 void adr(const Register& rd, Label* label);
1218
1219 // Calculate the address of a PC offset.
1220 void adr(const Register& rd, int imm21);
1221
1222 // Calculate the page address of a label.
1223 void adrp(const Register& rd, Label* label);
1224
1225 // Calculate the page address of a PC offset.
1226 void adrp(const Register& rd, int imm21);
1227
1228 // Data Processing instructions.
1229 // Add.
1230 void add(const Register& rd,
1231 const Register& rn,
1232 const Operand& operand);
1233
1234 // Add and update status flags.
1235 void adds(const Register& rd,
1236 const Register& rn,
1237 const Operand& operand);
1238
1239 // Compare negative.
1240 void cmn(const Register& rn, const Operand& operand);
1241
1242 // Subtract.
1243 void sub(const Register& rd,
1244 const Register& rn,
1245 const Operand& operand);
1246
1247 // Subtract and update status flags.
1248 void subs(const Register& rd,
1249 const Register& rn,
1250 const Operand& operand);
1251
1252 // Compare.
1253 void cmp(const Register& rn, const Operand& operand);
1254
1255 // Negate.
1256 void neg(const Register& rd,
1257 const Operand& operand);
1258
1259 // Negate and update status flags.
1260 void negs(const Register& rd,
1261 const Operand& operand);
1262
1263 // Add with carry bit.
1264 void adc(const Register& rd,
1265 const Register& rn,
1266 const Operand& operand);
1267
1268 // Add with carry bit and update status flags.
1269 void adcs(const Register& rd,
1270 const Register& rn,
1271 const Operand& operand);
1272
1273 // Subtract with carry bit.
1274 void sbc(const Register& rd,
1275 const Register& rn,
1276 const Operand& operand);
1277
1278 // Subtract with carry bit and update status flags.
1279 void sbcs(const Register& rd,
1280 const Register& rn,
1281 const Operand& operand);
1282
1283 // Negate with carry bit.
1284 void ngc(const Register& rd,
1285 const Operand& operand);
1286
1287 // Negate with carry bit and update status flags.
1288 void ngcs(const Register& rd,
1289 const Operand& operand);
1290
1291 // Logical instructions.
1292 // Bitwise and (A & B).
1293 void and_(const Register& rd,
1294 const Register& rn,
1295 const Operand& operand);
1296
1297 // Bitwise and (A & B) and update status flags.
1298 void ands(const Register& rd,
1299 const Register& rn,
1300 const Operand& operand);
1301
1302 // Bit test and set flags.
1303 void tst(const Register& rn, const Operand& operand);
1304
1305 // Bit clear (A & ~B).
1306 void bic(const Register& rd,
1307 const Register& rn,
1308 const Operand& operand);
1309
1310 // Bit clear (A & ~B) and update status flags.
1311 void bics(const Register& rd,
1312 const Register& rn,
1313 const Operand& operand);
1314
1315 // Bitwise or (A | B).
1316 void orr(const Register& rd, const Register& rn, const Operand& operand);
1317
1318 // Bitwise nor (A | ~B).
1319 void orn(const Register& rd, const Register& rn, const Operand& operand);
1320
1321 // Bitwise eor/xor (A ^ B).
1322 void eor(const Register& rd, const Register& rn, const Operand& operand);
1323
1324 // Bitwise enor/xnor (A ^ ~B).
1325 void eon(const Register& rd, const Register& rn, const Operand& operand);
1326
1327 // Logical shift left by variable.
1328 void lslv(const Register& rd, const Register& rn, const Register& rm);
1329
1330 // Logical shift right by variable.
1331 void lsrv(const Register& rd, const Register& rn, const Register& rm);
1332
1333 // Arithmetic shift right by variable.
1334 void asrv(const Register& rd, const Register& rn, const Register& rm);
1335
1336 // Rotate right by variable.
1337 void rorv(const Register& rd, const Register& rn, const Register& rm);
1338
1339 // Bitfield instructions.
1340 // Bitfield move.
1341 void bfm(const Register& rd,
1342 const Register& rn,
1343 unsigned immr,
1344 unsigned imms);
1345
1346 // Signed bitfield move.
1347 void sbfm(const Register& rd,
1348 const Register& rn,
1349 unsigned immr,
1350 unsigned imms);
1351
1352 // Unsigned bitfield move.
1353 void ubfm(const Register& rd,
1354 const Register& rn,
1355 unsigned immr,
1356 unsigned imms);
1357
1358 // Bfm aliases.
1359 // Bitfield insert.
bfi(const Register & rd,const Register & rn,unsigned lsb,unsigned width)1360 void bfi(const Register& rd,
1361 const Register& rn,
1362 unsigned lsb,
1363 unsigned width) {
1364 VIXL_ASSERT(width >= 1);
1365 VIXL_ASSERT(lsb + width <= rn.size());
1366 bfm(rd, rn, (rd.size() - lsb) & (rd.size() - 1), width - 1);
1367 }
1368
1369 // Bitfield extract and insert low.
bfxil(const Register & rd,const Register & rn,unsigned lsb,unsigned width)1370 void bfxil(const Register& rd,
1371 const Register& rn,
1372 unsigned lsb,
1373 unsigned width) {
1374 VIXL_ASSERT(width >= 1);
1375 VIXL_ASSERT(lsb + width <= rn.size());
1376 bfm(rd, rn, lsb, lsb + width - 1);
1377 }
1378
1379 // Sbfm aliases.
1380 // Arithmetic shift right.
asr(const Register & rd,const Register & rn,unsigned shift)1381 void asr(const Register& rd, const Register& rn, unsigned shift) {
1382 VIXL_ASSERT(shift < rd.size());
1383 sbfm(rd, rn, shift, rd.size() - 1);
1384 }
1385
1386 // Signed bitfield insert with zero at right.
sbfiz(const Register & rd,const Register & rn,unsigned lsb,unsigned width)1387 void sbfiz(const Register& rd,
1388 const Register& rn,
1389 unsigned lsb,
1390 unsigned width) {
1391 VIXL_ASSERT(width >= 1);
1392 VIXL_ASSERT(lsb + width <= rn.size());
1393 sbfm(rd, rn, (rd.size() - lsb) & (rd.size() - 1), width - 1);
1394 }
1395
1396 // Signed bitfield extract.
sbfx(const Register & rd,const Register & rn,unsigned lsb,unsigned width)1397 void sbfx(const Register& rd,
1398 const Register& rn,
1399 unsigned lsb,
1400 unsigned width) {
1401 VIXL_ASSERT(width >= 1);
1402 VIXL_ASSERT(lsb + width <= rn.size());
1403 sbfm(rd, rn, lsb, lsb + width - 1);
1404 }
1405
1406 // Signed extend byte.
sxtb(const Register & rd,const Register & rn)1407 void sxtb(const Register& rd, const Register& rn) {
1408 sbfm(rd, rn, 0, 7);
1409 }
1410
1411 // Signed extend halfword.
sxth(const Register & rd,const Register & rn)1412 void sxth(const Register& rd, const Register& rn) {
1413 sbfm(rd, rn, 0, 15);
1414 }
1415
1416 // Signed extend word.
sxtw(const Register & rd,const Register & rn)1417 void sxtw(const Register& rd, const Register& rn) {
1418 sbfm(rd, rn, 0, 31);
1419 }
1420
1421 // Ubfm aliases.
1422 // Logical shift left.
lsl(const Register & rd,const Register & rn,unsigned shift)1423 void lsl(const Register& rd, const Register& rn, unsigned shift) {
1424 unsigned reg_size = rd.size();
1425 VIXL_ASSERT(shift < reg_size);
1426 ubfm(rd, rn, (reg_size - shift) % reg_size, reg_size - shift - 1);
1427 }
1428
1429 // Logical shift right.
lsr(const Register & rd,const Register & rn,unsigned shift)1430 void lsr(const Register& rd, const Register& rn, unsigned shift) {
1431 VIXL_ASSERT(shift < rd.size());
1432 ubfm(rd, rn, shift, rd.size() - 1);
1433 }
1434
1435 // Unsigned bitfield insert with zero at right.
ubfiz(const Register & rd,const Register & rn,unsigned lsb,unsigned width)1436 void ubfiz(const Register& rd,
1437 const Register& rn,
1438 unsigned lsb,
1439 unsigned width) {
1440 VIXL_ASSERT(width >= 1);
1441 VIXL_ASSERT(lsb + width <= rn.size());
1442 ubfm(rd, rn, (rd.size() - lsb) & (rd.size() - 1), width - 1);
1443 }
1444
1445 // Unsigned bitfield extract.
ubfx(const Register & rd,const Register & rn,unsigned lsb,unsigned width)1446 void ubfx(const Register& rd,
1447 const Register& rn,
1448 unsigned lsb,
1449 unsigned width) {
1450 VIXL_ASSERT(width >= 1);
1451 VIXL_ASSERT(lsb + width <= rn.size());
1452 ubfm(rd, rn, lsb, lsb + width - 1);
1453 }
1454
1455 // Unsigned extend byte.
uxtb(const Register & rd,const Register & rn)1456 void uxtb(const Register& rd, const Register& rn) {
1457 ubfm(rd, rn, 0, 7);
1458 }
1459
1460 // Unsigned extend halfword.
uxth(const Register & rd,const Register & rn)1461 void uxth(const Register& rd, const Register& rn) {
1462 ubfm(rd, rn, 0, 15);
1463 }
1464
1465 // Unsigned extend word.
uxtw(const Register & rd,const Register & rn)1466 void uxtw(const Register& rd, const Register& rn) {
1467 ubfm(rd, rn, 0, 31);
1468 }
1469
1470 // Extract.
1471 void extr(const Register& rd,
1472 const Register& rn,
1473 const Register& rm,
1474 unsigned lsb);
1475
1476 // Conditional select: rd = cond ? rn : rm.
1477 void csel(const Register& rd,
1478 const Register& rn,
1479 const Register& rm,
1480 Condition cond);
1481
1482 // Conditional select increment: rd = cond ? rn : rm + 1.
1483 void csinc(const Register& rd,
1484 const Register& rn,
1485 const Register& rm,
1486 Condition cond);
1487
1488 // Conditional select inversion: rd = cond ? rn : ~rm.
1489 void csinv(const Register& rd,
1490 const Register& rn,
1491 const Register& rm,
1492 Condition cond);
1493
1494 // Conditional select negation: rd = cond ? rn : -rm.
1495 void csneg(const Register& rd,
1496 const Register& rn,
1497 const Register& rm,
1498 Condition cond);
1499
1500 // Conditional set: rd = cond ? 1 : 0.
1501 void cset(const Register& rd, Condition cond);
1502
1503 // Conditional set mask: rd = cond ? -1 : 0.
1504 void csetm(const Register& rd, Condition cond);
1505
1506 // Conditional increment: rd = cond ? rn + 1 : rn.
1507 void cinc(const Register& rd, const Register& rn, Condition cond);
1508
1509 // Conditional invert: rd = cond ? ~rn : rn.
1510 void cinv(const Register& rd, const Register& rn, Condition cond);
1511
1512 // Conditional negate: rd = cond ? -rn : rn.
1513 void cneg(const Register& rd, const Register& rn, Condition cond);
1514
1515 // Rotate right.
ror(const Register & rd,const Register & rs,unsigned shift)1516 void ror(const Register& rd, const Register& rs, unsigned shift) {
1517 extr(rd, rs, rs, shift);
1518 }
1519
1520 // Conditional comparison.
1521 // Conditional compare negative.
1522 void ccmn(const Register& rn,
1523 const Operand& operand,
1524 StatusFlags nzcv,
1525 Condition cond);
1526
1527 // Conditional compare.
1528 void ccmp(const Register& rn,
1529 const Operand& operand,
1530 StatusFlags nzcv,
1531 Condition cond);
1532
1533 // CRC-32 checksum from byte.
1534 void crc32b(const Register& rd,
1535 const Register& rn,
1536 const Register& rm);
1537
1538 // CRC-32 checksum from half-word.
1539 void crc32h(const Register& rd,
1540 const Register& rn,
1541 const Register& rm);
1542
1543 // CRC-32 checksum from word.
1544 void crc32w(const Register& rd,
1545 const Register& rn,
1546 const Register& rm);
1547
1548 // CRC-32 checksum from double word.
1549 void crc32x(const Register& rd,
1550 const Register& rn,
1551 const Register& rm);
1552
1553 // CRC-32 C checksum from byte.
1554 void crc32cb(const Register& rd,
1555 const Register& rn,
1556 const Register& rm);
1557
1558 // CRC-32 C checksum from half-word.
1559 void crc32ch(const Register& rd,
1560 const Register& rn,
1561 const Register& rm);
1562
1563 // CRC-32 C checksum from word.
1564 void crc32cw(const Register& rd,
1565 const Register& rn,
1566 const Register& rm);
1567
1568 // CRC-32C checksum from double word.
1569 void crc32cx(const Register& rd,
1570 const Register& rn,
1571 const Register& rm);
1572
1573 // Multiply.
1574 void mul(const Register& rd, const Register& rn, const Register& rm);
1575
1576 // Negated multiply.
1577 void mneg(const Register& rd, const Register& rn, const Register& rm);
1578
1579 // Signed long multiply: 32 x 32 -> 64-bit.
1580 void smull(const Register& rd, const Register& rn, const Register& rm);
1581
1582 // Signed multiply high: 64 x 64 -> 64-bit <127:64>.
1583 void smulh(const Register& xd, const Register& xn, const Register& xm);
1584
1585 // Multiply and accumulate.
1586 void madd(const Register& rd,
1587 const Register& rn,
1588 const Register& rm,
1589 const Register& ra);
1590
1591 // Multiply and subtract.
1592 void msub(const Register& rd,
1593 const Register& rn,
1594 const Register& rm,
1595 const Register& ra);
1596
1597 // Signed long multiply and accumulate: 32 x 32 + 64 -> 64-bit.
1598 void smaddl(const Register& rd,
1599 const Register& rn,
1600 const Register& rm,
1601 const Register& ra);
1602
1603 // Unsigned long multiply and accumulate: 32 x 32 + 64 -> 64-bit.
1604 void umaddl(const Register& rd,
1605 const Register& rn,
1606 const Register& rm,
1607 const Register& ra);
1608
1609 // Unsigned long multiply: 32 x 32 -> 64-bit.
umull(const Register & rd,const Register & rn,const Register & rm)1610 void umull(const Register& rd,
1611 const Register& rn,
1612 const Register& rm) {
1613 umaddl(rd, rn, rm, xzr);
1614 }
1615
1616 // Unsigned multiply high: 64 x 64 -> 64-bit <127:64>.
1617 void umulh(const Register& xd,
1618 const Register& xn,
1619 const Register& xm);
1620
1621 // Signed long multiply and subtract: 64 - (32 x 32) -> 64-bit.
1622 void smsubl(const Register& rd,
1623 const Register& rn,
1624 const Register& rm,
1625 const Register& ra);
1626
1627 // Unsigned long multiply and subtract: 64 - (32 x 32) -> 64-bit.
1628 void umsubl(const Register& rd,
1629 const Register& rn,
1630 const Register& rm,
1631 const Register& ra);
1632
1633 // Signed integer divide.
1634 void sdiv(const Register& rd, const Register& rn, const Register& rm);
1635
1636 // Unsigned integer divide.
1637 void udiv(const Register& rd, const Register& rn, const Register& rm);
1638
1639 // Bit reverse.
1640 void rbit(const Register& rd, const Register& rn);
1641
1642 // Reverse bytes in 16-bit half words.
1643 void rev16(const Register& rd, const Register& rn);
1644
1645 // Reverse bytes in 32-bit words.
1646 void rev32(const Register& rd, const Register& rn);
1647
1648 // Reverse bytes.
1649 void rev(const Register& rd, const Register& rn);
1650
1651 // Count leading zeroes.
1652 void clz(const Register& rd, const Register& rn);
1653
1654 // Count leading sign bits.
1655 void cls(const Register& rd, const Register& rn);
1656
1657 // Memory instructions.
1658 // Load integer or FP register.
1659 void ldr(const CPURegister& rt, const MemOperand& src,
1660 LoadStoreScalingOption option = PreferScaledOffset);
1661
1662 // Store integer or FP register.
1663 void str(const CPURegister& rt, const MemOperand& dst,
1664 LoadStoreScalingOption option = PreferScaledOffset);
1665
1666 // Load word with sign extension.
1667 void ldrsw(const Register& rt, const MemOperand& src,
1668 LoadStoreScalingOption option = PreferScaledOffset);
1669
1670 // Load byte.
1671 void ldrb(const Register& rt, const MemOperand& src,
1672 LoadStoreScalingOption option = PreferScaledOffset);
1673
1674 // Store byte.
1675 void strb(const Register& rt, const MemOperand& dst,
1676 LoadStoreScalingOption option = PreferScaledOffset);
1677
1678 // Load byte with sign extension.
1679 void ldrsb(const Register& rt, const MemOperand& src,
1680 LoadStoreScalingOption option = PreferScaledOffset);
1681
1682 // Load half-word.
1683 void ldrh(const Register& rt, const MemOperand& src,
1684 LoadStoreScalingOption option = PreferScaledOffset);
1685
1686 // Store half-word.
1687 void strh(const Register& rt, const MemOperand& dst,
1688 LoadStoreScalingOption option = PreferScaledOffset);
1689
1690 // Load half-word with sign extension.
1691 void ldrsh(const Register& rt, const MemOperand& src,
1692 LoadStoreScalingOption option = PreferScaledOffset);
1693
1694 // Load integer or FP register (with unscaled offset).
1695 void ldur(const CPURegister& rt, const MemOperand& src,
1696 LoadStoreScalingOption option = PreferUnscaledOffset);
1697
1698 // Store integer or FP register (with unscaled offset).
1699 void stur(const CPURegister& rt, const MemOperand& src,
1700 LoadStoreScalingOption option = PreferUnscaledOffset);
1701
1702 // Load word with sign extension.
1703 void ldursw(const Register& rt, const MemOperand& src,
1704 LoadStoreScalingOption option = PreferUnscaledOffset);
1705
1706 // Load byte (with unscaled offset).
1707 void ldurb(const Register& rt, const MemOperand& src,
1708 LoadStoreScalingOption option = PreferUnscaledOffset);
1709
1710 // Store byte (with unscaled offset).
1711 void sturb(const Register& rt, const MemOperand& dst,
1712 LoadStoreScalingOption option = PreferUnscaledOffset);
1713
1714 // Load byte with sign extension (and unscaled offset).
1715 void ldursb(const Register& rt, const MemOperand& src,
1716 LoadStoreScalingOption option = PreferUnscaledOffset);
1717
1718 // Load half-word (with unscaled offset).
1719 void ldurh(const Register& rt, const MemOperand& src,
1720 LoadStoreScalingOption option = PreferUnscaledOffset);
1721
1722 // Store half-word (with unscaled offset).
1723 void sturh(const Register& rt, const MemOperand& dst,
1724 LoadStoreScalingOption option = PreferUnscaledOffset);
1725
1726 // Load half-word with sign extension (and unscaled offset).
1727 void ldursh(const Register& rt, const MemOperand& src,
1728 LoadStoreScalingOption option = PreferUnscaledOffset);
1729
1730 // Load integer or FP register pair.
1731 void ldp(const CPURegister& rt, const CPURegister& rt2,
1732 const MemOperand& src);
1733
1734 // Store integer or FP register pair.
1735 void stp(const CPURegister& rt, const CPURegister& rt2,
1736 const MemOperand& dst);
1737
1738 // Load word pair with sign extension.
1739 void ldpsw(const Register& rt, const Register& rt2, const MemOperand& src);
1740
1741 // Load integer or FP register pair, non-temporal.
1742 void ldnp(const CPURegister& rt, const CPURegister& rt2,
1743 const MemOperand& src);
1744
1745 // Store integer or FP register pair, non-temporal.
1746 void stnp(const CPURegister& rt, const CPURegister& rt2,
1747 const MemOperand& dst);
1748
1749 // Load integer or FP register from literal pool.
1750 void ldr(const CPURegister& rt, RawLiteral* literal);
1751
1752 // Load word with sign extension from literal pool.
1753 void ldrsw(const Register& rt, RawLiteral* literal);
1754
1755 // Load integer or FP register from pc + imm19 << 2.
1756 void ldr(const CPURegister& rt, int imm19);
1757
1758 // Load word with sign extension from pc + imm19 << 2.
1759 void ldrsw(const Register& rt, int imm19);
1760
1761 // Store exclusive byte.
1762 void stxrb(const Register& rs, const Register& rt, const MemOperand& dst);
1763
1764 // Store exclusive half-word.
1765 void stxrh(const Register& rs, const Register& rt, const MemOperand& dst);
1766
1767 // Store exclusive register.
1768 void stxr(const Register& rs, const Register& rt, const MemOperand& dst);
1769
1770 // Load exclusive byte.
1771 void ldxrb(const Register& rt, const MemOperand& src);
1772
1773 // Load exclusive half-word.
1774 void ldxrh(const Register& rt, const MemOperand& src);
1775
1776 // Load exclusive register.
1777 void ldxr(const Register& rt, const MemOperand& src);
1778
1779 // Store exclusive register pair.
1780 void stxp(const Register& rs,
1781 const Register& rt,
1782 const Register& rt2,
1783 const MemOperand& dst);
1784
1785 // Load exclusive register pair.
1786 void ldxp(const Register& rt, const Register& rt2, const MemOperand& src);
1787
1788 // Store-release exclusive byte.
1789 void stlxrb(const Register& rs, const Register& rt, const MemOperand& dst);
1790
1791 // Store-release exclusive half-word.
1792 void stlxrh(const Register& rs, const Register& rt, const MemOperand& dst);
1793
1794 // Store-release exclusive register.
1795 void stlxr(const Register& rs, const Register& rt, const MemOperand& dst);
1796
1797 // Load-acquire exclusive byte.
1798 void ldaxrb(const Register& rt, const MemOperand& src);
1799
1800 // Load-acquire exclusive half-word.
1801 void ldaxrh(const Register& rt, const MemOperand& src);
1802
1803 // Load-acquire exclusive register.
1804 void ldaxr(const Register& rt, const MemOperand& src);
1805
1806 // Store-release exclusive register pair.
1807 void stlxp(const Register& rs,
1808 const Register& rt,
1809 const Register& rt2,
1810 const MemOperand& dst);
1811
1812 // Load-acquire exclusive register pair.
1813 void ldaxp(const Register& rt, const Register& rt2, const MemOperand& src);
1814
1815 // Store-release byte.
1816 void stlrb(const Register& rt, const MemOperand& dst);
1817
1818 // Store-release half-word.
1819 void stlrh(const Register& rt, const MemOperand& dst);
1820
1821 // Store-release register.
1822 void stlr(const Register& rt, const MemOperand& dst);
1823
1824 // Load-acquire byte.
1825 void ldarb(const Register& rt, const MemOperand& src);
1826
1827 // Load-acquire half-word.
1828 void ldarh(const Register& rt, const MemOperand& src);
1829
1830 // Load-acquire register.
1831 void ldar(const Register& rt, const MemOperand& src);
1832
1833 // Prefetch memory.
1834 void prfm(PrefetchOperation op, const MemOperand& addr,
1835 LoadStoreScalingOption option = PreferScaledOffset);
1836
1837 // Prefetch memory (with unscaled offset).
1838 void prfum(PrefetchOperation op, const MemOperand& addr,
1839 LoadStoreScalingOption option = PreferUnscaledOffset);
1840
1841 // Prefetch memory in the literal pool.
1842 void prfm(PrefetchOperation op, RawLiteral* literal);
1843
1844 // Prefetch from pc + imm19 << 2.
1845 void prfm(PrefetchOperation op, int imm19);
1846
1847 // Move instructions. The default shift of -1 indicates that the move
1848 // instruction will calculate an appropriate 16-bit immediate and left shift
1849 // that is equal to the 64-bit immediate argument. If an explicit left shift
1850 // is specified (0, 16, 32 or 48), the immediate must be a 16-bit value.
1851 //
1852 // For movk, an explicit shift can be used to indicate which half word should
1853 // be overwritten, eg. movk(x0, 0, 0) will overwrite the least-significant
1854 // half word with zero, whereas movk(x0, 0, 48) will overwrite the
1855 // most-significant.
1856
1857 // Move immediate and keep.
1858 void movk(const Register& rd, uint64_t imm, int shift = -1) {
1859 MoveWide(rd, imm, shift, MOVK);
1860 }
1861
1862 // Move inverted immediate.
1863 void movn(const Register& rd, uint64_t imm, int shift = -1) {
1864 MoveWide(rd, imm, shift, MOVN);
1865 }
1866
1867 // Move immediate.
1868 void movz(const Register& rd, uint64_t imm, int shift = -1) {
1869 MoveWide(rd, imm, shift, MOVZ);
1870 }
1871
1872 // Misc instructions.
1873 // Monitor debug-mode breakpoint.
1874 void brk(int code);
1875
1876 // Halting debug-mode breakpoint.
1877 void hlt(int code);
1878
1879 // Generate exception targeting EL1.
1880 void svc(int code);
1881
1882 // Move register to register.
1883 void mov(const Register& rd, const Register& rn);
1884
1885 // Move inverted operand to register.
1886 void mvn(const Register& rd, const Operand& operand);
1887
1888 // System instructions.
1889 // Move to register from system register.
1890 void mrs(const Register& rt, SystemRegister sysreg);
1891
1892 // Move from register to system register.
1893 void msr(SystemRegister sysreg, const Register& rt);
1894
1895 // System instruction.
1896 void sys(int op1, int crn, int crm, int op2, const Register& rt = xzr);
1897
1898 // System instruction with pre-encoded op (op1:crn:crm:op2).
1899 void sys(int op, const Register& rt = xzr);
1900
1901 // System data cache operation.
1902 void dc(DataCacheOp op, const Register& rt);
1903
1904 // System instruction cache operation.
1905 void ic(InstructionCacheOp op, const Register& rt);
1906
1907 // System hint.
1908 void hint(SystemHint code);
1909
1910 // Clear exclusive monitor.
1911 void clrex(int imm4 = 0xf);
1912
1913 // Data memory barrier.
1914 void dmb(BarrierDomain domain, BarrierType type);
1915
1916 // Data synchronization barrier.
1917 void dsb(BarrierDomain domain, BarrierType type);
1918
1919 // Instruction synchronization barrier.
1920 void isb();
1921
1922 // Alias for system instructions.
1923 // No-op.
nop()1924 void nop() {
1925 hint(NOP);
1926 }
1927
1928 // FP and NEON instructions.
1929 // Move double precision immediate to FP register.
1930 void fmov(const VRegister& vd, double imm);
1931
1932 // Move single precision immediate to FP register.
1933 void fmov(const VRegister& vd, float imm);
1934
1935 // Move FP register to register.
1936 void fmov(const Register& rd, const VRegister& fn);
1937
1938 // Move register to FP register.
1939 void fmov(const VRegister& vd, const Register& rn);
1940
1941 // Move FP register to FP register.
1942 void fmov(const VRegister& vd, const VRegister& fn);
1943
1944 // Move 64-bit register to top half of 128-bit FP register.
1945 void fmov(const VRegister& vd, int index, const Register& rn);
1946
1947 // Move top half of 128-bit FP register to 64-bit register.
1948 void fmov(const Register& rd, const VRegister& vn, int index);
1949
1950 // FP add.
1951 void fadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1952
1953 // FP subtract.
1954 void fsub(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1955
1956 // FP multiply.
1957 void fmul(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1958
1959 // FP fused multiply-add.
1960 void fmadd(const VRegister& vd,
1961 const VRegister& vn,
1962 const VRegister& vm,
1963 const VRegister& va);
1964
1965 // FP fused multiply-subtract.
1966 void fmsub(const VRegister& vd,
1967 const VRegister& vn,
1968 const VRegister& vm,
1969 const VRegister& va);
1970
1971 // FP fused multiply-add and negate.
1972 void fnmadd(const VRegister& vd,
1973 const VRegister& vn,
1974 const VRegister& vm,
1975 const VRegister& va);
1976
1977 // FP fused multiply-subtract and negate.
1978 void fnmsub(const VRegister& vd,
1979 const VRegister& vn,
1980 const VRegister& vm,
1981 const VRegister& va);
1982
1983 // FP multiply-negate scalar.
1984 void fnmul(const VRegister& vd,
1985 const VRegister& vn,
1986 const VRegister& vm);
1987
1988 // FP reciprocal exponent scalar.
1989 void frecpx(const VRegister& vd,
1990 const VRegister& vn);
1991
1992 // FP divide.
1993 void fdiv(const VRegister& vd, const VRegister& fn, const VRegister& vm);
1994
1995 // FP maximum.
1996 void fmax(const VRegister& vd, const VRegister& fn, const VRegister& vm);
1997
1998 // FP minimum.
1999 void fmin(const VRegister& vd, const VRegister& fn, const VRegister& vm);
2000
2001 // FP maximum number.
2002 void fmaxnm(const VRegister& vd, const VRegister& fn, const VRegister& vm);
2003
2004 // FP minimum number.
2005 void fminnm(const VRegister& vd, const VRegister& fn, const VRegister& vm);
2006
2007 // FP absolute.
2008 void fabs(const VRegister& vd, const VRegister& vn);
2009
2010 // FP negate.
2011 void fneg(const VRegister& vd, const VRegister& vn);
2012
2013 // FP square root.
2014 void fsqrt(const VRegister& vd, const VRegister& vn);
2015
2016 // FP round to integer, nearest with ties to away.
2017 void frinta(const VRegister& vd, const VRegister& vn);
2018
2019 // FP round to integer, implicit rounding.
2020 void frinti(const VRegister& vd, const VRegister& vn);
2021
2022 // FP round to integer, toward minus infinity.
2023 void frintm(const VRegister& vd, const VRegister& vn);
2024
2025 // FP round to integer, nearest with ties to even.
2026 void frintn(const VRegister& vd, const VRegister& vn);
2027
2028 // FP round to integer, toward plus infinity.
2029 void frintp(const VRegister& vd, const VRegister& vn);
2030
2031 // FP round to integer, exact, implicit rounding.
2032 void frintx(const VRegister& vd, const VRegister& vn);
2033
2034 // FP round to integer, towards zero.
2035 void frintz(const VRegister& vd, const VRegister& vn);
2036
2037 void FPCompareMacro(const VRegister& vn,
2038 double value,
2039 FPTrapFlags trap);
2040
2041 void FPCompareMacro(const VRegister& vn,
2042 const VRegister& vm,
2043 FPTrapFlags trap);
2044
2045 // FP compare registers.
2046 void fcmp(const VRegister& vn, const VRegister& vm);
2047
2048 // FP compare immediate.
2049 void fcmp(const VRegister& vn, double value);
2050
2051 void FPCCompareMacro(const VRegister& vn,
2052 const VRegister& vm,
2053 StatusFlags nzcv,
2054 Condition cond,
2055 FPTrapFlags trap);
2056
2057 // FP conditional compare.
2058 void fccmp(const VRegister& vn,
2059 const VRegister& vm,
2060 StatusFlags nzcv,
2061 Condition cond);
2062
2063 // FP signaling compare registers.
2064 void fcmpe(const VRegister& vn, const VRegister& vm);
2065
2066 // FP signaling compare immediate.
2067 void fcmpe(const VRegister& vn, double value);
2068
2069 // FP conditional signaling compare.
2070 void fccmpe(const VRegister& vn,
2071 const VRegister& vm,
2072 StatusFlags nzcv,
2073 Condition cond);
2074
2075 // FP conditional select.
2076 void fcsel(const VRegister& vd,
2077 const VRegister& vn,
2078 const VRegister& vm,
2079 Condition cond);
2080
2081 // Common FP Convert functions.
2082 void NEONFPConvertToInt(const Register& rd,
2083 const VRegister& vn,
2084 Instr op);
2085 void NEONFPConvertToInt(const VRegister& vd,
2086 const VRegister& vn,
2087 Instr op);
2088
2089 // FP convert between precisions.
2090 void fcvt(const VRegister& vd, const VRegister& vn);
2091
2092 // FP convert to higher precision.
2093 void fcvtl(const VRegister& vd, const VRegister& vn);
2094
2095 // FP convert to higher precision (second part).
2096 void fcvtl2(const VRegister& vd, const VRegister& vn);
2097
2098 // FP convert to lower precision.
2099 void fcvtn(const VRegister& vd, const VRegister& vn);
2100
2101 // FP convert to lower prevision (second part).
2102 void fcvtn2(const VRegister& vd, const VRegister& vn);
2103
2104 // FP convert to lower precision, rounding to odd.
2105 void fcvtxn(const VRegister& vd, const VRegister& vn);
2106
2107 // FP convert to lower precision, rounding to odd (second part).
2108 void fcvtxn2(const VRegister& vd, const VRegister& vn);
2109
2110 // FP convert to signed integer, nearest with ties to away.
2111 void fcvtas(const Register& rd, const VRegister& vn);
2112
2113 // FP convert to unsigned integer, nearest with ties to away.
2114 void fcvtau(const Register& rd, const VRegister& vn);
2115
2116 // FP convert to signed integer, nearest with ties to away.
2117 void fcvtas(const VRegister& vd, const VRegister& vn);
2118
2119 // FP convert to unsigned integer, nearest with ties to away.
2120 void fcvtau(const VRegister& vd, const VRegister& vn);
2121
2122 // FP convert to signed integer, round towards -infinity.
2123 void fcvtms(const Register& rd, const VRegister& vn);
2124
2125 // FP convert to unsigned integer, round towards -infinity.
2126 void fcvtmu(const Register& rd, const VRegister& vn);
2127
2128 // FP convert to signed integer, round towards -infinity.
2129 void fcvtms(const VRegister& vd, const VRegister& vn);
2130
2131 // FP convert to unsigned integer, round towards -infinity.
2132 void fcvtmu(const VRegister& vd, const VRegister& vn);
2133
2134 // FP convert to signed integer, nearest with ties to even.
2135 void fcvtns(const Register& rd, const VRegister& vn);
2136
2137 // FP convert to unsigned integer, nearest with ties to even.
2138 void fcvtnu(const Register& rd, const VRegister& vn);
2139
2140 // FP convert to signed integer, nearest with ties to even.
2141 void fcvtns(const VRegister& rd, const VRegister& vn);
2142
2143 // FP convert to unsigned integer, nearest with ties to even.
2144 void fcvtnu(const VRegister& rd, const VRegister& vn);
2145
2146 // FP convert to signed integer or fixed-point, round towards zero.
2147 void fcvtzs(const Register& rd, const VRegister& vn, int fbits = 0);
2148
2149 // FP convert to unsigned integer or fixed-point, round towards zero.
2150 void fcvtzu(const Register& rd, const VRegister& vn, int fbits = 0);
2151
2152 // FP convert to signed integer or fixed-point, round towards zero.
2153 void fcvtzs(const VRegister& vd, const VRegister& vn, int fbits = 0);
2154
2155 // FP convert to unsigned integer or fixed-point, round towards zero.
2156 void fcvtzu(const VRegister& vd, const VRegister& vn, int fbits = 0);
2157
2158 // FP convert to signed integer, round towards +infinity.
2159 void fcvtps(const Register& rd, const VRegister& vn);
2160
2161 // FP convert to unsigned integer, round towards +infinity.
2162 void fcvtpu(const Register& rd, const VRegister& vn);
2163
2164 // FP convert to signed integer, round towards +infinity.
2165 void fcvtps(const VRegister& vd, const VRegister& vn);
2166
2167 // FP convert to unsigned integer, round towards +infinity.
2168 void fcvtpu(const VRegister& vd, const VRegister& vn);
2169
2170 // Convert signed integer or fixed point to FP.
2171 void scvtf(const VRegister& fd, const Register& rn, int fbits = 0);
2172
2173 // Convert unsigned integer or fixed point to FP.
2174 void ucvtf(const VRegister& fd, const Register& rn, int fbits = 0);
2175
2176 // Convert signed integer or fixed-point to FP.
2177 void scvtf(const VRegister& fd, const VRegister& vn, int fbits = 0);
2178
2179 // Convert unsigned integer or fixed-point to FP.
2180 void ucvtf(const VRegister& fd, const VRegister& vn, int fbits = 0);
2181
2182 // Unsigned absolute difference.
2183 void uabd(const VRegister& vd,
2184 const VRegister& vn,
2185 const VRegister& vm);
2186
2187 // Signed absolute difference.
2188 void sabd(const VRegister& vd,
2189 const VRegister& vn,
2190 const VRegister& vm);
2191
2192 // Unsigned absolute difference and accumulate.
2193 void uaba(const VRegister& vd,
2194 const VRegister& vn,
2195 const VRegister& vm);
2196
2197 // Signed absolute difference and accumulate.
2198 void saba(const VRegister& vd,
2199 const VRegister& vn,
2200 const VRegister& vm);
2201
2202 // Add.
2203 void add(const VRegister& vd,
2204 const VRegister& vn,
2205 const VRegister& vm);
2206
2207 // Subtract.
2208 void sub(const VRegister& vd,
2209 const VRegister& vn,
2210 const VRegister& vm);
2211
2212 // Unsigned halving add.
2213 void uhadd(const VRegister& vd,
2214 const VRegister& vn,
2215 const VRegister& vm);
2216
2217 // Signed halving add.
2218 void shadd(const VRegister& vd,
2219 const VRegister& vn,
2220 const VRegister& vm);
2221
2222 // Unsigned rounding halving add.
2223 void urhadd(const VRegister& vd,
2224 const VRegister& vn,
2225 const VRegister& vm);
2226
2227 // Signed rounding halving add.
2228 void srhadd(const VRegister& vd,
2229 const VRegister& vn,
2230 const VRegister& vm);
2231
2232 // Unsigned halving sub.
2233 void uhsub(const VRegister& vd,
2234 const VRegister& vn,
2235 const VRegister& vm);
2236
2237 // Signed halving sub.
2238 void shsub(const VRegister& vd,
2239 const VRegister& vn,
2240 const VRegister& vm);
2241
2242 // Unsigned saturating add.
2243 void uqadd(const VRegister& vd,
2244 const VRegister& vn,
2245 const VRegister& vm);
2246
2247 // Signed saturating add.
2248 void sqadd(const VRegister& vd,
2249 const VRegister& vn,
2250 const VRegister& vm);
2251
2252 // Unsigned saturating subtract.
2253 void uqsub(const VRegister& vd,
2254 const VRegister& vn,
2255 const VRegister& vm);
2256
2257 // Signed saturating subtract.
2258 void sqsub(const VRegister& vd,
2259 const VRegister& vn,
2260 const VRegister& vm);
2261
2262 // Add pairwise.
2263 void addp(const VRegister& vd,
2264 const VRegister& vn,
2265 const VRegister& vm);
2266
2267 // Add pair of elements scalar.
2268 void addp(const VRegister& vd,
2269 const VRegister& vn);
2270
2271 // Multiply-add to accumulator.
2272 void mla(const VRegister& vd,
2273 const VRegister& vn,
2274 const VRegister& vm);
2275
2276 // Multiply-subtract to accumulator.
2277 void mls(const VRegister& vd,
2278 const VRegister& vn,
2279 const VRegister& vm);
2280
2281 // Multiply.
2282 void mul(const VRegister& vd,
2283 const VRegister& vn,
2284 const VRegister& vm);
2285
2286 // Multiply by scalar element.
2287 void mul(const VRegister& vd,
2288 const VRegister& vn,
2289 const VRegister& vm,
2290 int vm_index);
2291
2292 // Multiply-add by scalar element.
2293 void mla(const VRegister& vd,
2294 const VRegister& vn,
2295 const VRegister& vm,
2296 int vm_index);
2297
2298 // Multiply-subtract by scalar element.
2299 void mls(const VRegister& vd,
2300 const VRegister& vn,
2301 const VRegister& vm,
2302 int vm_index);
2303
2304 // Signed long multiply-add by scalar element.
2305 void smlal(const VRegister& vd,
2306 const VRegister& vn,
2307 const VRegister& vm,
2308 int vm_index);
2309
2310 // Signed long multiply-add by scalar element (second part).
2311 void smlal2(const VRegister& vd,
2312 const VRegister& vn,
2313 const VRegister& vm,
2314 int vm_index);
2315
2316 // Unsigned long multiply-add by scalar element.
2317 void umlal(const VRegister& vd,
2318 const VRegister& vn,
2319 const VRegister& vm,
2320 int vm_index);
2321
2322 // Unsigned long multiply-add by scalar element (second part).
2323 void umlal2(const VRegister& vd,
2324 const VRegister& vn,
2325 const VRegister& vm,
2326 int vm_index);
2327
2328 // Signed long multiply-sub by scalar element.
2329 void smlsl(const VRegister& vd,
2330 const VRegister& vn,
2331 const VRegister& vm,
2332 int vm_index);
2333
2334 // Signed long multiply-sub by scalar element (second part).
2335 void smlsl2(const VRegister& vd,
2336 const VRegister& vn,
2337 const VRegister& vm,
2338 int vm_index);
2339
2340 // Unsigned long multiply-sub by scalar element.
2341 void umlsl(const VRegister& vd,
2342 const VRegister& vn,
2343 const VRegister& vm,
2344 int vm_index);
2345
2346 // Unsigned long multiply-sub by scalar element (second part).
2347 void umlsl2(const VRegister& vd,
2348 const VRegister& vn,
2349 const VRegister& vm,
2350 int vm_index);
2351
2352 // Signed long multiply by scalar element.
2353 void smull(const VRegister& vd,
2354 const VRegister& vn,
2355 const VRegister& vm,
2356 int vm_index);
2357
2358 // Signed long multiply by scalar element (second part).
2359 void smull2(const VRegister& vd,
2360 const VRegister& vn,
2361 const VRegister& vm,
2362 int vm_index);
2363
2364 // Unsigned long multiply by scalar element.
2365 void umull(const VRegister& vd,
2366 const VRegister& vn,
2367 const VRegister& vm,
2368 int vm_index);
2369
2370 // Unsigned long multiply by scalar element (second part).
2371 void umull2(const VRegister& vd,
2372 const VRegister& vn,
2373 const VRegister& vm,
2374 int vm_index);
2375
2376 // Signed saturating double long multiply by element.
2377 void sqdmull(const VRegister& vd,
2378 const VRegister& vn,
2379 const VRegister& vm,
2380 int vm_index);
2381
2382 // Signed saturating double long multiply by element (second part).
2383 void sqdmull2(const VRegister& vd,
2384 const VRegister& vn,
2385 const VRegister& vm,
2386 int vm_index);
2387
2388 // Signed saturating doubling long multiply-add by element.
2389 void sqdmlal(const VRegister& vd,
2390 const VRegister& vn,
2391 const VRegister& vm,
2392 int vm_index);
2393
2394 // Signed saturating doubling long multiply-add by element (second part).
2395 void sqdmlal2(const VRegister& vd,
2396 const VRegister& vn,
2397 const VRegister& vm,
2398 int vm_index);
2399
2400 // Signed saturating doubling long multiply-sub by element.
2401 void sqdmlsl(const VRegister& vd,
2402 const VRegister& vn,
2403 const VRegister& vm,
2404 int vm_index);
2405
2406 // Signed saturating doubling long multiply-sub by element (second part).
2407 void sqdmlsl2(const VRegister& vd,
2408 const VRegister& vn,
2409 const VRegister& vm,
2410 int vm_index);
2411
2412 // Compare equal.
2413 void cmeq(const VRegister& vd,
2414 const VRegister& vn,
2415 const VRegister& vm);
2416
2417 // Compare signed greater than or equal.
2418 void cmge(const VRegister& vd,
2419 const VRegister& vn,
2420 const VRegister& vm);
2421
2422 // Compare signed greater than.
2423 void cmgt(const VRegister& vd,
2424 const VRegister& vn,
2425 const VRegister& vm);
2426
2427 // Compare unsigned higher.
2428 void cmhi(const VRegister& vd,
2429 const VRegister& vn,
2430 const VRegister& vm);
2431
2432 // Compare unsigned higher or same.
2433 void cmhs(const VRegister& vd,
2434 const VRegister& vn,
2435 const VRegister& vm);
2436
2437 // Compare bitwise test bits nonzero.
2438 void cmtst(const VRegister& vd,
2439 const VRegister& vn,
2440 const VRegister& vm);
2441
2442 // Compare bitwise to zero.
2443 void cmeq(const VRegister& vd,
2444 const VRegister& vn,
2445 int value);
2446
2447 // Compare signed greater than or equal to zero.
2448 void cmge(const VRegister& vd,
2449 const VRegister& vn,
2450 int value);
2451
2452 // Compare signed greater than zero.
2453 void cmgt(const VRegister& vd,
2454 const VRegister& vn,
2455 int value);
2456
2457 // Compare signed less than or equal to zero.
2458 void cmle(const VRegister& vd,
2459 const VRegister& vn,
2460 int value);
2461
2462 // Compare signed less than zero.
2463 void cmlt(const VRegister& vd,
2464 const VRegister& vn,
2465 int value);
2466
2467 // Signed shift left by register.
2468 void sshl(const VRegister& vd,
2469 const VRegister& vn,
2470 const VRegister& vm);
2471
2472 // Unsigned shift left by register.
2473 void ushl(const VRegister& vd,
2474 const VRegister& vn,
2475 const VRegister& vm);
2476
2477 // Signed saturating shift left by register.
2478 void sqshl(const VRegister& vd,
2479 const VRegister& vn,
2480 const VRegister& vm);
2481
2482 // Unsigned saturating shift left by register.
2483 void uqshl(const VRegister& vd,
2484 const VRegister& vn,
2485 const VRegister& vm);
2486
2487 // Signed rounding shift left by register.
2488 void srshl(const VRegister& vd,
2489 const VRegister& vn,
2490 const VRegister& vm);
2491
2492 // Unsigned rounding shift left by register.
2493 void urshl(const VRegister& vd,
2494 const VRegister& vn,
2495 const VRegister& vm);
2496
2497 // Signed saturating rounding shift left by register.
2498 void sqrshl(const VRegister& vd,
2499 const VRegister& vn,
2500 const VRegister& vm);
2501
2502 // Unsigned saturating rounding shift left by register.
2503 void uqrshl(const VRegister& vd,
2504 const VRegister& vn,
2505 const VRegister& vm);
2506
2507 // Bitwise and.
2508 void and_(const VRegister& vd,
2509 const VRegister& vn,
2510 const VRegister& vm);
2511
2512 // Bitwise or.
2513 void orr(const VRegister& vd,
2514 const VRegister& vn,
2515 const VRegister& vm);
2516
2517 // Bitwise or immediate.
2518 void orr(const VRegister& vd,
2519 const int imm8,
2520 const int left_shift = 0);
2521
2522 // Move register to register.
2523 void mov(const VRegister& vd,
2524 const VRegister& vn);
2525
2526 // Bitwise orn.
2527 void orn(const VRegister& vd,
2528 const VRegister& vn,
2529 const VRegister& vm);
2530
2531 // Bitwise eor.
2532 void eor(const VRegister& vd,
2533 const VRegister& vn,
2534 const VRegister& vm);
2535
2536 // Bit clear immediate.
2537 void bic(const VRegister& vd,
2538 const int imm8,
2539 const int left_shift = 0);
2540
2541 // Bit clear.
2542 void bic(const VRegister& vd,
2543 const VRegister& vn,
2544 const VRegister& vm);
2545
2546 // Bitwise insert if false.
2547 void bif(const VRegister& vd,
2548 const VRegister& vn,
2549 const VRegister& vm);
2550
2551 // Bitwise insert if true.
2552 void bit(const VRegister& vd,
2553 const VRegister& vn,
2554 const VRegister& vm);
2555
2556 // Bitwise select.
2557 void bsl(const VRegister& vd,
2558 const VRegister& vn,
2559 const VRegister& vm);
2560
2561 // Polynomial multiply.
2562 void pmul(const VRegister& vd,
2563 const VRegister& vn,
2564 const VRegister& vm);
2565
2566 // Vector move immediate.
2567 void movi(const VRegister& vd,
2568 const uint64_t imm,
2569 Shift shift = LSL,
2570 const int shift_amount = 0);
2571
2572 // Bitwise not.
2573 void mvn(const VRegister& vd,
2574 const VRegister& vn);
2575
2576 // Vector move inverted immediate.
2577 void mvni(const VRegister& vd,
2578 const int imm8,
2579 Shift shift = LSL,
2580 const int shift_amount = 0);
2581
2582 // Signed saturating accumulate of unsigned value.
2583 void suqadd(const VRegister& vd,
2584 const VRegister& vn);
2585
2586 // Unsigned saturating accumulate of signed value.
2587 void usqadd(const VRegister& vd,
2588 const VRegister& vn);
2589
2590 // Absolute value.
2591 void abs(const VRegister& vd,
2592 const VRegister& vn);
2593
2594 // Signed saturating absolute value.
2595 void sqabs(const VRegister& vd,
2596 const VRegister& vn);
2597
2598 // Negate.
2599 void neg(const VRegister& vd,
2600 const VRegister& vn);
2601
2602 // Signed saturating negate.
2603 void sqneg(const VRegister& vd,
2604 const VRegister& vn);
2605
2606 // Bitwise not.
2607 void not_(const VRegister& vd,
2608 const VRegister& vn);
2609
2610 // Extract narrow.
2611 void xtn(const VRegister& vd,
2612 const VRegister& vn);
2613
2614 // Extract narrow (second part).
2615 void xtn2(const VRegister& vd,
2616 const VRegister& vn);
2617
2618 // Signed saturating extract narrow.
2619 void sqxtn(const VRegister& vd,
2620 const VRegister& vn);
2621
2622 // Signed saturating extract narrow (second part).
2623 void sqxtn2(const VRegister& vd,
2624 const VRegister& vn);
2625
2626 // Unsigned saturating extract narrow.
2627 void uqxtn(const VRegister& vd,
2628 const VRegister& vn);
2629
2630 // Unsigned saturating extract narrow (second part).
2631 void uqxtn2(const VRegister& vd,
2632 const VRegister& vn);
2633
2634 // Signed saturating extract unsigned narrow.
2635 void sqxtun(const VRegister& vd,
2636 const VRegister& vn);
2637
2638 // Signed saturating extract unsigned narrow (second part).
2639 void sqxtun2(const VRegister& vd,
2640 const VRegister& vn);
2641
2642 // Extract vector from pair of vectors.
2643 void ext(const VRegister& vd,
2644 const VRegister& vn,
2645 const VRegister& vm,
2646 int index);
2647
2648 // Duplicate vector element to vector or scalar.
2649 void dup(const VRegister& vd,
2650 const VRegister& vn,
2651 int vn_index);
2652
2653 // Move vector element to scalar.
2654 void mov(const VRegister& vd,
2655 const VRegister& vn,
2656 int vn_index);
2657
2658 // Duplicate general-purpose register to vector.
2659 void dup(const VRegister& vd,
2660 const Register& rn);
2661
2662 // Insert vector element from another vector element.
2663 void ins(const VRegister& vd,
2664 int vd_index,
2665 const VRegister& vn,
2666 int vn_index);
2667
2668 // Move vector element to another vector element.
2669 void mov(const VRegister& vd,
2670 int vd_index,
2671 const VRegister& vn,
2672 int vn_index);
2673
2674 // Insert vector element from general-purpose register.
2675 void ins(const VRegister& vd,
2676 int vd_index,
2677 const Register& rn);
2678
2679 // Move general-purpose register to a vector element.
2680 void mov(const VRegister& vd,
2681 int vd_index,
2682 const Register& rn);
2683
2684 // Unsigned move vector element to general-purpose register.
2685 void umov(const Register& rd,
2686 const VRegister& vn,
2687 int vn_index);
2688
2689 // Move vector element to general-purpose register.
2690 void mov(const Register& rd,
2691 const VRegister& vn,
2692 int vn_index);
2693
2694 // Signed move vector element to general-purpose register.
2695 void smov(const Register& rd,
2696 const VRegister& vn,
2697 int vn_index);
2698
2699 // One-element structure load to one register.
2700 void ld1(const VRegister& vt,
2701 const MemOperand& src);
2702
2703 // One-element structure load to two registers.
2704 void ld1(const VRegister& vt,
2705 const VRegister& vt2,
2706 const MemOperand& src);
2707
2708 // One-element structure load to three registers.
2709 void ld1(const VRegister& vt,
2710 const VRegister& vt2,
2711 const VRegister& vt3,
2712 const MemOperand& src);
2713
2714 // One-element structure load to four registers.
2715 void ld1(const VRegister& vt,
2716 const VRegister& vt2,
2717 const VRegister& vt3,
2718 const VRegister& vt4,
2719 const MemOperand& src);
2720
2721 // One-element single structure load to one lane.
2722 void ld1(const VRegister& vt,
2723 int lane,
2724 const MemOperand& src);
2725
2726 // One-element single structure load to all lanes.
2727 void ld1r(const VRegister& vt,
2728 const MemOperand& src);
2729
2730 // Two-element structure load.
2731 void ld2(const VRegister& vt,
2732 const VRegister& vt2,
2733 const MemOperand& src);
2734
2735 // Two-element single structure load to one lane.
2736 void ld2(const VRegister& vt,
2737 const VRegister& vt2,
2738 int lane,
2739 const MemOperand& src);
2740
2741 // Two-element single structure load to all lanes.
2742 void ld2r(const VRegister& vt,
2743 const VRegister& vt2,
2744 const MemOperand& src);
2745
2746 // Three-element structure load.
2747 void ld3(const VRegister& vt,
2748 const VRegister& vt2,
2749 const VRegister& vt3,
2750 const MemOperand& src);
2751
2752 // Three-element single structure load to one lane.
2753 void ld3(const VRegister& vt,
2754 const VRegister& vt2,
2755 const VRegister& vt3,
2756 int lane,
2757 const MemOperand& src);
2758
2759 // Three-element single structure load to all lanes.
2760 void ld3r(const VRegister& vt,
2761 const VRegister& vt2,
2762 const VRegister& vt3,
2763 const MemOperand& src);
2764
2765 // Four-element structure load.
2766 void ld4(const VRegister& vt,
2767 const VRegister& vt2,
2768 const VRegister& vt3,
2769 const VRegister& vt4,
2770 const MemOperand& src);
2771
2772 // Four-element single structure load to one lane.
2773 void ld4(const VRegister& vt,
2774 const VRegister& vt2,
2775 const VRegister& vt3,
2776 const VRegister& vt4,
2777 int lane,
2778 const MemOperand& src);
2779
2780 // Four-element single structure load to all lanes.
2781 void ld4r(const VRegister& vt,
2782 const VRegister& vt2,
2783 const VRegister& vt3,
2784 const VRegister& vt4,
2785 const MemOperand& src);
2786
2787 // Count leading sign bits.
2788 void cls(const VRegister& vd,
2789 const VRegister& vn);
2790
2791 // Count leading zero bits (vector).
2792 void clz(const VRegister& vd,
2793 const VRegister& vn);
2794
2795 // Population count per byte.
2796 void cnt(const VRegister& vd,
2797 const VRegister& vn);
2798
2799 // Reverse bit order.
2800 void rbit(const VRegister& vd,
2801 const VRegister& vn);
2802
2803 // Reverse elements in 16-bit halfwords.
2804 void rev16(const VRegister& vd,
2805 const VRegister& vn);
2806
2807 // Reverse elements in 32-bit words.
2808 void rev32(const VRegister& vd,
2809 const VRegister& vn);
2810
2811 // Reverse elements in 64-bit doublewords.
2812 void rev64(const VRegister& vd,
2813 const VRegister& vn);
2814
2815 // Unsigned reciprocal square root estimate.
2816 void ursqrte(const VRegister& vd,
2817 const VRegister& vn);
2818
2819 // Unsigned reciprocal estimate.
2820 void urecpe(const VRegister& vd,
2821 const VRegister& vn);
2822
2823 // Signed pairwise long add.
2824 void saddlp(const VRegister& vd,
2825 const VRegister& vn);
2826
2827 // Unsigned pairwise long add.
2828 void uaddlp(const VRegister& vd,
2829 const VRegister& vn);
2830
2831 // Signed pairwise long add and accumulate.
2832 void sadalp(const VRegister& vd,
2833 const VRegister& vn);
2834
2835 // Unsigned pairwise long add and accumulate.
2836 void uadalp(const VRegister& vd,
2837 const VRegister& vn);
2838
2839 // Shift left by immediate.
2840 void shl(const VRegister& vd,
2841 const VRegister& vn,
2842 int shift);
2843
2844 // Signed saturating shift left by immediate.
2845 void sqshl(const VRegister& vd,
2846 const VRegister& vn,
2847 int shift);
2848
2849 // Signed saturating shift left unsigned by immediate.
2850 void sqshlu(const VRegister& vd,
2851 const VRegister& vn,
2852 int shift);
2853
2854 // Unsigned saturating shift left by immediate.
2855 void uqshl(const VRegister& vd,
2856 const VRegister& vn,
2857 int shift);
2858
2859 // Signed shift left long by immediate.
2860 void sshll(const VRegister& vd,
2861 const VRegister& vn,
2862 int shift);
2863
2864 // Signed shift left long by immediate (second part).
2865 void sshll2(const VRegister& vd,
2866 const VRegister& vn,
2867 int shift);
2868
2869 // Signed extend long.
2870 void sxtl(const VRegister& vd,
2871 const VRegister& vn);
2872
2873 // Signed extend long (second part).
2874 void sxtl2(const VRegister& vd,
2875 const VRegister& vn);
2876
2877 // Unsigned shift left long by immediate.
2878 void ushll(const VRegister& vd,
2879 const VRegister& vn,
2880 int shift);
2881
2882 // Unsigned shift left long by immediate (second part).
2883 void ushll2(const VRegister& vd,
2884 const VRegister& vn,
2885 int shift);
2886
2887 // Shift left long by element size.
2888 void shll(const VRegister& vd,
2889 const VRegister& vn,
2890 int shift);
2891
2892 // Shift left long by element size (second part).
2893 void shll2(const VRegister& vd,
2894 const VRegister& vn,
2895 int shift);
2896
2897 // Unsigned extend long.
2898 void uxtl(const VRegister& vd,
2899 const VRegister& vn);
2900
2901 // Unsigned extend long (second part).
2902 void uxtl2(const VRegister& vd,
2903 const VRegister& vn);
2904
2905 // Shift left by immediate and insert.
2906 void sli(const VRegister& vd,
2907 const VRegister& vn,
2908 int shift);
2909
2910 // Shift right by immediate and insert.
2911 void sri(const VRegister& vd,
2912 const VRegister& vn,
2913 int shift);
2914
2915 // Signed maximum.
2916 void smax(const VRegister& vd,
2917 const VRegister& vn,
2918 const VRegister& vm);
2919
2920 // Signed pairwise maximum.
2921 void smaxp(const VRegister& vd,
2922 const VRegister& vn,
2923 const VRegister& vm);
2924
2925 // Add across vector.
2926 void addv(const VRegister& vd,
2927 const VRegister& vn);
2928
2929 // Signed add long across vector.
2930 void saddlv(const VRegister& vd,
2931 const VRegister& vn);
2932
2933 // Unsigned add long across vector.
2934 void uaddlv(const VRegister& vd,
2935 const VRegister& vn);
2936
2937 // FP maximum number across vector.
2938 void fmaxnmv(const VRegister& vd,
2939 const VRegister& vn);
2940
2941 // FP maximum across vector.
2942 void fmaxv(const VRegister& vd,
2943 const VRegister& vn);
2944
2945 // FP minimum number across vector.
2946 void fminnmv(const VRegister& vd,
2947 const VRegister& vn);
2948
2949 // FP minimum across vector.
2950 void fminv(const VRegister& vd,
2951 const VRegister& vn);
2952
2953 // Signed maximum across vector.
2954 void smaxv(const VRegister& vd,
2955 const VRegister& vn);
2956
2957 // Signed minimum.
2958 void smin(const VRegister& vd,
2959 const VRegister& vn,
2960 const VRegister& vm);
2961
2962 // Signed minimum pairwise.
2963 void sminp(const VRegister& vd,
2964 const VRegister& vn,
2965 const VRegister& vm);
2966
2967 // Signed minimum across vector.
2968 void sminv(const VRegister& vd,
2969 const VRegister& vn);
2970
2971 // One-element structure store from one register.
2972 void st1(const VRegister& vt,
2973 const MemOperand& src);
2974
2975 // One-element structure store from two registers.
2976 void st1(const VRegister& vt,
2977 const VRegister& vt2,
2978 const MemOperand& src);
2979
2980 // One-element structure store from three registers.
2981 void st1(const VRegister& vt,
2982 const VRegister& vt2,
2983 const VRegister& vt3,
2984 const MemOperand& src);
2985
2986 // One-element structure store from four registers.
2987 void st1(const VRegister& vt,
2988 const VRegister& vt2,
2989 const VRegister& vt3,
2990 const VRegister& vt4,
2991 const MemOperand& src);
2992
2993 // One-element single structure store from one lane.
2994 void st1(const VRegister& vt,
2995 int lane,
2996 const MemOperand& src);
2997
2998 // Two-element structure store from two registers.
2999 void st2(const VRegister& vt,
3000 const VRegister& vt2,
3001 const MemOperand& src);
3002
3003 // Two-element single structure store from two lanes.
3004 void st2(const VRegister& vt,
3005 const VRegister& vt2,
3006 int lane,
3007 const MemOperand& src);
3008
3009 // Three-element structure store from three registers.
3010 void st3(const VRegister& vt,
3011 const VRegister& vt2,
3012 const VRegister& vt3,
3013 const MemOperand& src);
3014
3015 // Three-element single structure store from three lanes.
3016 void st3(const VRegister& vt,
3017 const VRegister& vt2,
3018 const VRegister& vt3,
3019 int lane,
3020 const MemOperand& src);
3021
3022 // Four-element structure store from four registers.
3023 void st4(const VRegister& vt,
3024 const VRegister& vt2,
3025 const VRegister& vt3,
3026 const VRegister& vt4,
3027 const MemOperand& src);
3028
3029 // Four-element single structure store from four lanes.
3030 void st4(const VRegister& vt,
3031 const VRegister& vt2,
3032 const VRegister& vt3,
3033 const VRegister& vt4,
3034 int lane,
3035 const MemOperand& src);
3036
3037 // Unsigned add long.
3038 void uaddl(const VRegister& vd,
3039 const VRegister& vn,
3040 const VRegister& vm);
3041
3042 // Unsigned add long (second part).
3043 void uaddl2(const VRegister& vd,
3044 const VRegister& vn,
3045 const VRegister& vm);
3046
3047 // Unsigned add wide.
3048 void uaddw(const VRegister& vd,
3049 const VRegister& vn,
3050 const VRegister& vm);
3051
3052 // Unsigned add wide (second part).
3053 void uaddw2(const VRegister& vd,
3054 const VRegister& vn,
3055 const VRegister& vm);
3056
3057 // Signed add long.
3058 void saddl(const VRegister& vd,
3059 const VRegister& vn,
3060 const VRegister& vm);
3061
3062 // Signed add long (second part).
3063 void saddl2(const VRegister& vd,
3064 const VRegister& vn,
3065 const VRegister& vm);
3066
3067 // Signed add wide.
3068 void saddw(const VRegister& vd,
3069 const VRegister& vn,
3070 const VRegister& vm);
3071
3072 // Signed add wide (second part).
3073 void saddw2(const VRegister& vd,
3074 const VRegister& vn,
3075 const VRegister& vm);
3076
3077 // Unsigned subtract long.
3078 void usubl(const VRegister& vd,
3079 const VRegister& vn,
3080 const VRegister& vm);
3081
3082 // Unsigned subtract long (second part).
3083 void usubl2(const VRegister& vd,
3084 const VRegister& vn,
3085 const VRegister& vm);
3086
3087 // Unsigned subtract wide.
3088 void usubw(const VRegister& vd,
3089 const VRegister& vn,
3090 const VRegister& vm);
3091
3092 // Unsigned subtract wide (second part).
3093 void usubw2(const VRegister& vd,
3094 const VRegister& vn,
3095 const VRegister& vm);
3096
3097 // Signed subtract long.
3098 void ssubl(const VRegister& vd,
3099 const VRegister& vn,
3100 const VRegister& vm);
3101
3102 // Signed subtract long (second part).
3103 void ssubl2(const VRegister& vd,
3104 const VRegister& vn,
3105 const VRegister& vm);
3106
3107 // Signed integer subtract wide.
3108 void ssubw(const VRegister& vd,
3109 const VRegister& vn,
3110 const VRegister& vm);
3111
3112 // Signed integer subtract wide (second part).
3113 void ssubw2(const VRegister& vd,
3114 const VRegister& vn,
3115 const VRegister& vm);
3116
3117 // Unsigned maximum.
3118 void umax(const VRegister& vd,
3119 const VRegister& vn,
3120 const VRegister& vm);
3121
3122 // Unsigned pairwise maximum.
3123 void umaxp(const VRegister& vd,
3124 const VRegister& vn,
3125 const VRegister& vm);
3126
3127 // Unsigned maximum across vector.
3128 void umaxv(const VRegister& vd,
3129 const VRegister& vn);
3130
3131 // Unsigned minimum.
3132 void umin(const VRegister& vd,
3133 const VRegister& vn,
3134 const VRegister& vm);
3135
3136 // Unsigned pairwise minimum.
3137 void uminp(const VRegister& vd,
3138 const VRegister& vn,
3139 const VRegister& vm);
3140
3141 // Unsigned minimum across vector.
3142 void uminv(const VRegister& vd,
3143 const VRegister& vn);
3144
3145 // Transpose vectors (primary).
3146 void trn1(const VRegister& vd,
3147 const VRegister& vn,
3148 const VRegister& vm);
3149
3150 // Transpose vectors (secondary).
3151 void trn2(const VRegister& vd,
3152 const VRegister& vn,
3153 const VRegister& vm);
3154
3155 // Unzip vectors (primary).
3156 void uzp1(const VRegister& vd,
3157 const VRegister& vn,
3158 const VRegister& vm);
3159
3160 // Unzip vectors (secondary).
3161 void uzp2(const VRegister& vd,
3162 const VRegister& vn,
3163 const VRegister& vm);
3164
3165 // Zip vectors (primary).
3166 void zip1(const VRegister& vd,
3167 const VRegister& vn,
3168 const VRegister& vm);
3169
3170 // Zip vectors (secondary).
3171 void zip2(const VRegister& vd,
3172 const VRegister& vn,
3173 const VRegister& vm);
3174
3175 // Signed shift right by immediate.
3176 void sshr(const VRegister& vd,
3177 const VRegister& vn,
3178 int shift);
3179
3180 // Unsigned shift right by immediate.
3181 void ushr(const VRegister& vd,
3182 const VRegister& vn,
3183 int shift);
3184
3185 // Signed rounding shift right by immediate.
3186 void srshr(const VRegister& vd,
3187 const VRegister& vn,
3188 int shift);
3189
3190 // Unsigned rounding shift right by immediate.
3191 void urshr(const VRegister& vd,
3192 const VRegister& vn,
3193 int shift);
3194
3195 // Signed shift right by immediate and accumulate.
3196 void ssra(const VRegister& vd,
3197 const VRegister& vn,
3198 int shift);
3199
3200 // Unsigned shift right by immediate and accumulate.
3201 void usra(const VRegister& vd,
3202 const VRegister& vn,
3203 int shift);
3204
3205 // Signed rounding shift right by immediate and accumulate.
3206 void srsra(const VRegister& vd,
3207 const VRegister& vn,
3208 int shift);
3209
3210 // Unsigned rounding shift right by immediate and accumulate.
3211 void ursra(const VRegister& vd,
3212 const VRegister& vn,
3213 int shift);
3214
3215 // Shift right narrow by immediate.
3216 void shrn(const VRegister& vd,
3217 const VRegister& vn,
3218 int shift);
3219
3220 // Shift right narrow by immediate (second part).
3221 void shrn2(const VRegister& vd,
3222 const VRegister& vn,
3223 int shift);
3224
3225 // Rounding shift right narrow by immediate.
3226 void rshrn(const VRegister& vd,
3227 const VRegister& vn,
3228 int shift);
3229
3230 // Rounding shift right narrow by immediate (second part).
3231 void rshrn2(const VRegister& vd,
3232 const VRegister& vn,
3233 int shift);
3234
3235 // Unsigned saturating shift right narrow by immediate.
3236 void uqshrn(const VRegister& vd,
3237 const VRegister& vn,
3238 int shift);
3239
3240 // Unsigned saturating shift right narrow by immediate (second part).
3241 void uqshrn2(const VRegister& vd,
3242 const VRegister& vn,
3243 int shift);
3244
3245 // Unsigned saturating rounding shift right narrow by immediate.
3246 void uqrshrn(const VRegister& vd,
3247 const VRegister& vn,
3248 int shift);
3249
3250 // Unsigned saturating rounding shift right narrow by immediate (second part).
3251 void uqrshrn2(const VRegister& vd,
3252 const VRegister& vn,
3253 int shift);
3254
3255 // Signed saturating shift right narrow by immediate.
3256 void sqshrn(const VRegister& vd,
3257 const VRegister& vn,
3258 int shift);
3259
3260 // Signed saturating shift right narrow by immediate (second part).
3261 void sqshrn2(const VRegister& vd,
3262 const VRegister& vn,
3263 int shift);
3264
3265 // Signed saturating rounded shift right narrow by immediate.
3266 void sqrshrn(const VRegister& vd,
3267 const VRegister& vn,
3268 int shift);
3269
3270 // Signed saturating rounded shift right narrow by immediate (second part).
3271 void sqrshrn2(const VRegister& vd,
3272 const VRegister& vn,
3273 int shift);
3274
3275 // Signed saturating shift right unsigned narrow by immediate.
3276 void sqshrun(const VRegister& vd,
3277 const VRegister& vn,
3278 int shift);
3279
3280 // Signed saturating shift right unsigned narrow by immediate (second part).
3281 void sqshrun2(const VRegister& vd,
3282 const VRegister& vn,
3283 int shift);
3284
3285 // Signed sat rounded shift right unsigned narrow by immediate.
3286 void sqrshrun(const VRegister& vd,
3287 const VRegister& vn,
3288 int shift);
3289
3290 // Signed sat rounded shift right unsigned narrow by immediate (second part).
3291 void sqrshrun2(const VRegister& vd,
3292 const VRegister& vn,
3293 int shift);
3294
3295 // FP reciprocal step.
3296 void frecps(const VRegister& vd,
3297 const VRegister& vn,
3298 const VRegister& vm);
3299
3300 // FP reciprocal estimate.
3301 void frecpe(const VRegister& vd,
3302 const VRegister& vn);
3303
3304 // FP reciprocal square root estimate.
3305 void frsqrte(const VRegister& vd,
3306 const VRegister& vn);
3307
3308 // FP reciprocal square root step.
3309 void frsqrts(const VRegister& vd,
3310 const VRegister& vn,
3311 const VRegister& vm);
3312
3313 // Signed absolute difference and accumulate long.
3314 void sabal(const VRegister& vd,
3315 const VRegister& vn,
3316 const VRegister& vm);
3317
3318 // Signed absolute difference and accumulate long (second part).
3319 void sabal2(const VRegister& vd,
3320 const VRegister& vn,
3321 const VRegister& vm);
3322
3323 // Unsigned absolute difference and accumulate long.
3324 void uabal(const VRegister& vd,
3325 const VRegister& vn,
3326 const VRegister& vm);
3327
3328 // Unsigned absolute difference and accumulate long (second part).
3329 void uabal2(const VRegister& vd,
3330 const VRegister& vn,
3331 const VRegister& vm);
3332
3333 // Signed absolute difference long.
3334 void sabdl(const VRegister& vd,
3335 const VRegister& vn,
3336 const VRegister& vm);
3337
3338 // Signed absolute difference long (second part).
3339 void sabdl2(const VRegister& vd,
3340 const VRegister& vn,
3341 const VRegister& vm);
3342
3343 // Unsigned absolute difference long.
3344 void uabdl(const VRegister& vd,
3345 const VRegister& vn,
3346 const VRegister& vm);
3347
3348 // Unsigned absolute difference long (second part).
3349 void uabdl2(const VRegister& vd,
3350 const VRegister& vn,
3351 const VRegister& vm);
3352
3353 // Polynomial multiply long.
3354 void pmull(const VRegister& vd,
3355 const VRegister& vn,
3356 const VRegister& vm);
3357
3358 // Polynomial multiply long (second part).
3359 void pmull2(const VRegister& vd,
3360 const VRegister& vn,
3361 const VRegister& vm);
3362
3363 // Signed long multiply-add.
3364 void smlal(const VRegister& vd,
3365 const VRegister& vn,
3366 const VRegister& vm);
3367
3368 // Signed long multiply-add (second part).
3369 void smlal2(const VRegister& vd,
3370 const VRegister& vn,
3371 const VRegister& vm);
3372
3373 // Unsigned long multiply-add.
3374 void umlal(const VRegister& vd,
3375 const VRegister& vn,
3376 const VRegister& vm);
3377
3378 // Unsigned long multiply-add (second part).
3379 void umlal2(const VRegister& vd,
3380 const VRegister& vn,
3381 const VRegister& vm);
3382
3383 // Signed long multiply-sub.
3384 void smlsl(const VRegister& vd,
3385 const VRegister& vn,
3386 const VRegister& vm);
3387
3388 // Signed long multiply-sub (second part).
3389 void smlsl2(const VRegister& vd,
3390 const VRegister& vn,
3391 const VRegister& vm);
3392
3393 // Unsigned long multiply-sub.
3394 void umlsl(const VRegister& vd,
3395 const VRegister& vn,
3396 const VRegister& vm);
3397
3398 // Unsigned long multiply-sub (second part).
3399 void umlsl2(const VRegister& vd,
3400 const VRegister& vn,
3401 const VRegister& vm);
3402
3403 // Signed long multiply.
3404 void smull(const VRegister& vd,
3405 const VRegister& vn,
3406 const VRegister& vm);
3407
3408 // Signed long multiply (second part).
3409 void smull2(const VRegister& vd,
3410 const VRegister& vn,
3411 const VRegister& vm);
3412
3413 // Signed saturating doubling long multiply-add.
3414 void sqdmlal(const VRegister& vd,
3415 const VRegister& vn,
3416 const VRegister& vm);
3417
3418 // Signed saturating doubling long multiply-add (second part).
3419 void sqdmlal2(const VRegister& vd,
3420 const VRegister& vn,
3421 const VRegister& vm);
3422
3423 // Signed saturating doubling long multiply-subtract.
3424 void sqdmlsl(const VRegister& vd,
3425 const VRegister& vn,
3426 const VRegister& vm);
3427
3428 // Signed saturating doubling long multiply-subtract (second part).
3429 void sqdmlsl2(const VRegister& vd,
3430 const VRegister& vn,
3431 const VRegister& vm);
3432
3433 // Signed saturating doubling long multiply.
3434 void sqdmull(const VRegister& vd,
3435 const VRegister& vn,
3436 const VRegister& vm);
3437
3438 // Signed saturating doubling long multiply (second part).
3439 void sqdmull2(const VRegister& vd,
3440 const VRegister& vn,
3441 const VRegister& vm);
3442
3443 // Signed saturating doubling multiply returning high half.
3444 void sqdmulh(const VRegister& vd,
3445 const VRegister& vn,
3446 const VRegister& vm);
3447
3448 // Signed saturating rounding doubling multiply returning high half.
3449 void sqrdmulh(const VRegister& vd,
3450 const VRegister& vn,
3451 const VRegister& vm);
3452
3453 // Signed saturating doubling multiply element returning high half.
3454 void sqdmulh(const VRegister& vd,
3455 const VRegister& vn,
3456 const VRegister& vm,
3457 int vm_index);
3458
3459 // Signed saturating rounding doubling multiply element returning high half.
3460 void sqrdmulh(const VRegister& vd,
3461 const VRegister& vn,
3462 const VRegister& vm,
3463 int vm_index);
3464
3465 // Unsigned long multiply long.
3466 void umull(const VRegister& vd,
3467 const VRegister& vn,
3468 const VRegister& vm);
3469
3470 // Unsigned long multiply (second part).
3471 void umull2(const VRegister& vd,
3472 const VRegister& vn,
3473 const VRegister& vm);
3474
3475 // Add narrow returning high half.
3476 void addhn(const VRegister& vd,
3477 const VRegister& vn,
3478 const VRegister& vm);
3479
3480 // Add narrow returning high half (second part).
3481 void addhn2(const VRegister& vd,
3482 const VRegister& vn,
3483 const VRegister& vm);
3484
3485 // Rounding add narrow returning high half.
3486 void raddhn(const VRegister& vd,
3487 const VRegister& vn,
3488 const VRegister& vm);
3489
3490 // Rounding add narrow returning high half (second part).
3491 void raddhn2(const VRegister& vd,
3492 const VRegister& vn,
3493 const VRegister& vm);
3494
3495 // Subtract narrow returning high half.
3496 void subhn(const VRegister& vd,
3497 const VRegister& vn,
3498 const VRegister& vm);
3499
3500 // Subtract narrow returning high half (second part).
3501 void subhn2(const VRegister& vd,
3502 const VRegister& vn,
3503 const VRegister& vm);
3504
3505 // Rounding subtract narrow returning high half.
3506 void rsubhn(const VRegister& vd,
3507 const VRegister& vn,
3508 const VRegister& vm);
3509
3510 // Rounding subtract narrow returning high half (second part).
3511 void rsubhn2(const VRegister& vd,
3512 const VRegister& vn,
3513 const VRegister& vm);
3514
3515 // FP vector multiply accumulate.
3516 void fmla(const VRegister& vd,
3517 const VRegister& vn,
3518 const VRegister& vm);
3519
3520 // FP vector multiply subtract.
3521 void fmls(const VRegister& vd,
3522 const VRegister& vn,
3523 const VRegister& vm);
3524
3525 // FP vector multiply extended.
3526 void fmulx(const VRegister& vd,
3527 const VRegister& vn,
3528 const VRegister& vm);
3529
3530 // FP absolute greater than or equal.
3531 void facge(const VRegister& vd,
3532 const VRegister& vn,
3533 const VRegister& vm);
3534
3535 // FP absolute greater than.
3536 void facgt(const VRegister& vd,
3537 const VRegister& vn,
3538 const VRegister& vm);
3539
3540 // FP multiply by element.
3541 void fmul(const VRegister& vd,
3542 const VRegister& vn,
3543 const VRegister& vm,
3544 int vm_index);
3545
3546 // FP fused multiply-add to accumulator by element.
3547 void fmla(const VRegister& vd,
3548 const VRegister& vn,
3549 const VRegister& vm,
3550 int vm_index);
3551
3552 // FP fused multiply-sub from accumulator by element.
3553 void fmls(const VRegister& vd,
3554 const VRegister& vn,
3555 const VRegister& vm,
3556 int vm_index);
3557
3558 // FP multiply extended by element.
3559 void fmulx(const VRegister& vd,
3560 const VRegister& vn,
3561 const VRegister& vm,
3562 int vm_index);
3563
3564 // FP compare equal.
3565 void fcmeq(const VRegister& vd,
3566 const VRegister& vn,
3567 const VRegister& vm);
3568
3569 // FP greater than.
3570 void fcmgt(const VRegister& vd,
3571 const VRegister& vn,
3572 const VRegister& vm);
3573
3574 // FP greater than or equal.
3575 void fcmge(const VRegister& vd,
3576 const VRegister& vn,
3577 const VRegister& vm);
3578
3579 // FP compare equal to zero.
3580 void fcmeq(const VRegister& vd,
3581 const VRegister& vn,
3582 double imm);
3583
3584 // FP greater than zero.
3585 void fcmgt(const VRegister& vd,
3586 const VRegister& vn,
3587 double imm);
3588
3589 // FP greater than or equal to zero.
3590 void fcmge(const VRegister& vd,
3591 const VRegister& vn,
3592 double imm);
3593
3594 // FP less than or equal to zero.
3595 void fcmle(const VRegister& vd,
3596 const VRegister& vn,
3597 double imm);
3598
3599 // FP less than to zero.
3600 void fcmlt(const VRegister& vd,
3601 const VRegister& vn,
3602 double imm);
3603
3604 // FP absolute difference.
3605 void fabd(const VRegister& vd,
3606 const VRegister& vn,
3607 const VRegister& vm);
3608
3609 // FP pairwise add vector.
3610 void faddp(const VRegister& vd,
3611 const VRegister& vn,
3612 const VRegister& vm);
3613
3614 // FP pairwise add scalar.
3615 void faddp(const VRegister& vd,
3616 const VRegister& vn);
3617
3618 // FP pairwise maximum vector.
3619 void fmaxp(const VRegister& vd,
3620 const VRegister& vn,
3621 const VRegister& vm);
3622
3623 // FP pairwise maximum scalar.
3624 void fmaxp(const VRegister& vd,
3625 const VRegister& vn);
3626
3627 // FP pairwise minimum vector.
3628 void fminp(const VRegister& vd,
3629 const VRegister& vn,
3630 const VRegister& vm);
3631
3632 // FP pairwise minimum scalar.
3633 void fminp(const VRegister& vd,
3634 const VRegister& vn);
3635
3636 // FP pairwise maximum number vector.
3637 void fmaxnmp(const VRegister& vd,
3638 const VRegister& vn,
3639 const VRegister& vm);
3640
3641 // FP pairwise maximum number scalar.
3642 void fmaxnmp(const VRegister& vd,
3643 const VRegister& vn);
3644
3645 // FP pairwise minimum number vector.
3646 void fminnmp(const VRegister& vd,
3647 const VRegister& vn,
3648 const VRegister& vm);
3649
3650 // FP pairwise minimum number scalar.
3651 void fminnmp(const VRegister& vd,
3652 const VRegister& vn);
3653
3654 // Emit generic instructions.
3655 // Emit raw instructions into the instruction stream.
dci(Instr raw_inst)3656 void dci(Instr raw_inst) { Emit(raw_inst); }
3657
3658 // Emit 32 bits of data into the instruction stream.
dc32(uint32_t data)3659 void dc32(uint32_t data) {
3660 VIXL_ASSERT(buffer_monitor_ > 0);
3661 buffer_->Emit32(data);
3662 }
3663
3664 // Emit 64 bits of data into the instruction stream.
dc64(uint64_t data)3665 void dc64(uint64_t data) {
3666 VIXL_ASSERT(buffer_monitor_ > 0);
3667 buffer_->Emit64(data);
3668 }
3669
3670 // Copy a string into the instruction stream, including the terminating NULL
3671 // character. The instruction pointer is then aligned correctly for
3672 // subsequent instructions.
EmitString(const char * string)3673 void EmitString(const char * string) {
3674 VIXL_ASSERT(string != NULL);
3675 VIXL_ASSERT(buffer_monitor_ > 0);
3676
3677 buffer_->EmitString(string);
3678 buffer_->Align();
3679 }
3680
3681 // Code generation helpers.
3682
3683 // Register encoding.
Rd(CPURegister rd)3684 static Instr Rd(CPURegister rd) {
3685 VIXL_ASSERT(rd.code() != kSPRegInternalCode);
3686 return rd.code() << Rd_offset;
3687 }
3688
Rn(CPURegister rn)3689 static Instr Rn(CPURegister rn) {
3690 VIXL_ASSERT(rn.code() != kSPRegInternalCode);
3691 return rn.code() << Rn_offset;
3692 }
3693
Rm(CPURegister rm)3694 static Instr Rm(CPURegister rm) {
3695 VIXL_ASSERT(rm.code() != kSPRegInternalCode);
3696 return rm.code() << Rm_offset;
3697 }
3698
RmNot31(CPURegister rm)3699 static Instr RmNot31(CPURegister rm) {
3700 VIXL_ASSERT(rm.code() != kSPRegInternalCode);
3701 VIXL_ASSERT(!rm.IsZero());
3702 return Rm(rm);
3703 }
3704
Ra(CPURegister ra)3705 static Instr Ra(CPURegister ra) {
3706 VIXL_ASSERT(ra.code() != kSPRegInternalCode);
3707 return ra.code() << Ra_offset;
3708 }
3709
Rt(CPURegister rt)3710 static Instr Rt(CPURegister rt) {
3711 VIXL_ASSERT(rt.code() != kSPRegInternalCode);
3712 return rt.code() << Rt_offset;
3713 }
3714
Rt2(CPURegister rt2)3715 static Instr Rt2(CPURegister rt2) {
3716 VIXL_ASSERT(rt2.code() != kSPRegInternalCode);
3717 return rt2.code() << Rt2_offset;
3718 }
3719
Rs(CPURegister rs)3720 static Instr Rs(CPURegister rs) {
3721 VIXL_ASSERT(rs.code() != kSPRegInternalCode);
3722 return rs.code() << Rs_offset;
3723 }
3724
3725 // These encoding functions allow the stack pointer to be encoded, and
3726 // disallow the zero register.
RdSP(Register rd)3727 static Instr RdSP(Register rd) {
3728 VIXL_ASSERT(!rd.IsZero());
3729 return (rd.code() & kRegCodeMask) << Rd_offset;
3730 }
3731
RnSP(Register rn)3732 static Instr RnSP(Register rn) {
3733 VIXL_ASSERT(!rn.IsZero());
3734 return (rn.code() & kRegCodeMask) << Rn_offset;
3735 }
3736
3737 // Flags encoding.
Flags(FlagsUpdate S)3738 static Instr Flags(FlagsUpdate S) {
3739 if (S == SetFlags) {
3740 return 1 << FlagsUpdate_offset;
3741 } else if (S == LeaveFlags) {
3742 return 0 << FlagsUpdate_offset;
3743 }
3744 VIXL_UNREACHABLE();
3745 return 0;
3746 }
3747
Cond(Condition cond)3748 static Instr Cond(Condition cond) {
3749 return cond << Condition_offset;
3750 }
3751
3752 // PC-relative address encoding.
ImmPCRelAddress(int imm21)3753 static Instr ImmPCRelAddress(int imm21) {
3754 VIXL_ASSERT(is_int21(imm21));
3755 Instr imm = static_cast<Instr>(truncate_to_int21(imm21));
3756 Instr immhi = (imm >> ImmPCRelLo_width) << ImmPCRelHi_offset;
3757 Instr immlo = imm << ImmPCRelLo_offset;
3758 return (immhi & ImmPCRelHi_mask) | (immlo & ImmPCRelLo_mask);
3759 }
3760
3761 // Branch encoding.
ImmUncondBranch(int imm26)3762 static Instr ImmUncondBranch(int imm26) {
3763 VIXL_ASSERT(is_int26(imm26));
3764 return truncate_to_int26(imm26) << ImmUncondBranch_offset;
3765 }
3766
ImmCondBranch(int imm19)3767 static Instr ImmCondBranch(int imm19) {
3768 VIXL_ASSERT(is_int19(imm19));
3769 return truncate_to_int19(imm19) << ImmCondBranch_offset;
3770 }
3771
ImmCmpBranch(int imm19)3772 static Instr ImmCmpBranch(int imm19) {
3773 VIXL_ASSERT(is_int19(imm19));
3774 return truncate_to_int19(imm19) << ImmCmpBranch_offset;
3775 }
3776
ImmTestBranch(int imm14)3777 static Instr ImmTestBranch(int imm14) {
3778 VIXL_ASSERT(is_int14(imm14));
3779 return truncate_to_int14(imm14) << ImmTestBranch_offset;
3780 }
3781
ImmTestBranchBit(unsigned bit_pos)3782 static Instr ImmTestBranchBit(unsigned bit_pos) {
3783 VIXL_ASSERT(is_uint6(bit_pos));
3784 // Subtract five from the shift offset, as we need bit 5 from bit_pos.
3785 unsigned b5 = bit_pos << (ImmTestBranchBit5_offset - 5);
3786 unsigned b40 = bit_pos << ImmTestBranchBit40_offset;
3787 b5 &= ImmTestBranchBit5_mask;
3788 b40 &= ImmTestBranchBit40_mask;
3789 return b5 | b40;
3790 }
3791
3792 // Data Processing encoding.
SF(Register rd)3793 static Instr SF(Register rd) {
3794 return rd.Is64Bits() ? SixtyFourBits : ThirtyTwoBits;
3795 }
3796
ImmAddSub(int64_t imm)3797 static Instr ImmAddSub(int64_t imm) {
3798 VIXL_ASSERT(IsImmAddSub(imm));
3799 if (is_uint12(imm)) { // No shift required.
3800 return imm << ImmAddSub_offset;
3801 } else {
3802 return ((imm >> 12) << ImmAddSub_offset) | (1 << ShiftAddSub_offset);
3803 }
3804 }
3805
ImmS(unsigned imms,unsigned reg_size)3806 static Instr ImmS(unsigned imms, unsigned reg_size) {
3807 VIXL_ASSERT(((reg_size == kXRegSize) && is_uint6(imms)) ||
3808 ((reg_size == kWRegSize) && is_uint5(imms)));
3809 USE(reg_size);
3810 return imms << ImmS_offset;
3811 }
3812
ImmR(unsigned immr,unsigned reg_size)3813 static Instr ImmR(unsigned immr, unsigned reg_size) {
3814 VIXL_ASSERT(((reg_size == kXRegSize) && is_uint6(immr)) ||
3815 ((reg_size == kWRegSize) && is_uint5(immr)));
3816 USE(reg_size);
3817 VIXL_ASSERT(is_uint6(immr));
3818 return immr << ImmR_offset;
3819 }
3820
ImmSetBits(unsigned imms,unsigned reg_size)3821 static Instr ImmSetBits(unsigned imms, unsigned reg_size) {
3822 VIXL_ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize));
3823 VIXL_ASSERT(is_uint6(imms));
3824 VIXL_ASSERT((reg_size == kXRegSize) || is_uint6(imms + 3));
3825 USE(reg_size);
3826 return imms << ImmSetBits_offset;
3827 }
3828
ImmRotate(unsigned immr,unsigned reg_size)3829 static Instr ImmRotate(unsigned immr, unsigned reg_size) {
3830 VIXL_ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize));
3831 VIXL_ASSERT(((reg_size == kXRegSize) && is_uint6(immr)) ||
3832 ((reg_size == kWRegSize) && is_uint5(immr)));
3833 USE(reg_size);
3834 return immr << ImmRotate_offset;
3835 }
3836
ImmLLiteral(int imm19)3837 static Instr ImmLLiteral(int imm19) {
3838 VIXL_ASSERT(is_int19(imm19));
3839 return truncate_to_int19(imm19) << ImmLLiteral_offset;
3840 }
3841
BitN(unsigned bitn,unsigned reg_size)3842 static Instr BitN(unsigned bitn, unsigned reg_size) {
3843 VIXL_ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize));
3844 VIXL_ASSERT((reg_size == kXRegSize) || (bitn == 0));
3845 USE(reg_size);
3846 return bitn << BitN_offset;
3847 }
3848
ShiftDP(Shift shift)3849 static Instr ShiftDP(Shift shift) {
3850 VIXL_ASSERT(shift == LSL || shift == LSR || shift == ASR || shift == ROR);
3851 return shift << ShiftDP_offset;
3852 }
3853
ImmDPShift(unsigned amount)3854 static Instr ImmDPShift(unsigned amount) {
3855 VIXL_ASSERT(is_uint6(amount));
3856 return amount << ImmDPShift_offset;
3857 }
3858
ExtendMode(Extend extend)3859 static Instr ExtendMode(Extend extend) {
3860 return extend << ExtendMode_offset;
3861 }
3862
ImmExtendShift(unsigned left_shift)3863 static Instr ImmExtendShift(unsigned left_shift) {
3864 VIXL_ASSERT(left_shift <= 4);
3865 return left_shift << ImmExtendShift_offset;
3866 }
3867
ImmCondCmp(unsigned imm)3868 static Instr ImmCondCmp(unsigned imm) {
3869 VIXL_ASSERT(is_uint5(imm));
3870 return imm << ImmCondCmp_offset;
3871 }
3872
Nzcv(StatusFlags nzcv)3873 static Instr Nzcv(StatusFlags nzcv) {
3874 return ((nzcv >> Flags_offset) & 0xf) << Nzcv_offset;
3875 }
3876
3877 // MemOperand offset encoding.
ImmLSUnsigned(int imm12)3878 static Instr ImmLSUnsigned(int imm12) {
3879 VIXL_ASSERT(is_uint12(imm12));
3880 return imm12 << ImmLSUnsigned_offset;
3881 }
3882
ImmLS(int imm9)3883 static Instr ImmLS(int imm9) {
3884 VIXL_ASSERT(is_int9(imm9));
3885 return truncate_to_int9(imm9) << ImmLS_offset;
3886 }
3887
ImmLSPair(int imm7,unsigned access_size)3888 static Instr ImmLSPair(int imm7, unsigned access_size) {
3889 VIXL_ASSERT(((imm7 >> access_size) << access_size) == imm7);
3890 int scaled_imm7 = imm7 >> access_size;
3891 VIXL_ASSERT(is_int7(scaled_imm7));
3892 return truncate_to_int7(scaled_imm7) << ImmLSPair_offset;
3893 }
3894
ImmShiftLS(unsigned shift_amount)3895 static Instr ImmShiftLS(unsigned shift_amount) {
3896 VIXL_ASSERT(is_uint1(shift_amount));
3897 return shift_amount << ImmShiftLS_offset;
3898 }
3899
ImmPrefetchOperation(int imm5)3900 static Instr ImmPrefetchOperation(int imm5) {
3901 VIXL_ASSERT(is_uint5(imm5));
3902 return imm5 << ImmPrefetchOperation_offset;
3903 }
3904
ImmException(int imm16)3905 static Instr ImmException(int imm16) {
3906 VIXL_ASSERT(is_uint16(imm16));
3907 return imm16 << ImmException_offset;
3908 }
3909
ImmSystemRegister(int imm15)3910 static Instr ImmSystemRegister(int imm15) {
3911 VIXL_ASSERT(is_uint15(imm15));
3912 return imm15 << ImmSystemRegister_offset;
3913 }
3914
ImmHint(int imm7)3915 static Instr ImmHint(int imm7) {
3916 VIXL_ASSERT(is_uint7(imm7));
3917 return imm7 << ImmHint_offset;
3918 }
3919
CRm(int imm4)3920 static Instr CRm(int imm4) {
3921 VIXL_ASSERT(is_uint4(imm4));
3922 return imm4 << CRm_offset;
3923 }
3924
CRn(int imm4)3925 static Instr CRn(int imm4) {
3926 VIXL_ASSERT(is_uint4(imm4));
3927 return imm4 << CRn_offset;
3928 }
3929
SysOp(int imm14)3930 static Instr SysOp(int imm14) {
3931 VIXL_ASSERT(is_uint14(imm14));
3932 return imm14 << SysOp_offset;
3933 }
3934
ImmSysOp1(int imm3)3935 static Instr ImmSysOp1(int imm3) {
3936 VIXL_ASSERT(is_uint3(imm3));
3937 return imm3 << SysOp1_offset;
3938 }
3939
ImmSysOp2(int imm3)3940 static Instr ImmSysOp2(int imm3) {
3941 VIXL_ASSERT(is_uint3(imm3));
3942 return imm3 << SysOp2_offset;
3943 }
3944
ImmBarrierDomain(int imm2)3945 static Instr ImmBarrierDomain(int imm2) {
3946 VIXL_ASSERT(is_uint2(imm2));
3947 return imm2 << ImmBarrierDomain_offset;
3948 }
3949
ImmBarrierType(int imm2)3950 static Instr ImmBarrierType(int imm2) {
3951 VIXL_ASSERT(is_uint2(imm2));
3952 return imm2 << ImmBarrierType_offset;
3953 }
3954
3955 // Move immediates encoding.
ImmMoveWide(uint64_t imm)3956 static Instr ImmMoveWide(uint64_t imm) {
3957 VIXL_ASSERT(is_uint16(imm));
3958 return imm << ImmMoveWide_offset;
3959 }
3960
ShiftMoveWide(int64_t shift)3961 static Instr ShiftMoveWide(int64_t shift) {
3962 VIXL_ASSERT(is_uint2(shift));
3963 return shift << ShiftMoveWide_offset;
3964 }
3965
3966 // FP Immediates.
3967 static Instr ImmFP32(float imm);
3968 static Instr ImmFP64(double imm);
3969
3970 // FP register type.
FPType(FPRegister fd)3971 static Instr FPType(FPRegister fd) {
3972 return fd.Is64Bits() ? FP64 : FP32;
3973 }
3974
FPScale(unsigned scale)3975 static Instr FPScale(unsigned scale) {
3976 VIXL_ASSERT(is_uint6(scale));
3977 return scale << FPScale_offset;
3978 }
3979
3980 // Immediate field checking helpers.
3981 static bool IsImmAddSub(int64_t immediate);
3982 static bool IsImmConditionalCompare(int64_t immediate);
3983 static bool IsImmFP32(float imm);
3984 static bool IsImmFP64(double imm);
3985 static bool IsImmLogical(uint64_t value,
3986 unsigned width,
3987 unsigned* n = NULL,
3988 unsigned* imm_s = NULL,
3989 unsigned* imm_r = NULL);
3990 static bool IsImmLSPair(int64_t offset, unsigned access_size);
3991 static bool IsImmLSScaled(int64_t offset, unsigned access_size);
3992 static bool IsImmLSUnscaled(int64_t offset);
3993 static bool IsImmMovn(uint64_t imm, unsigned reg_size);
3994 static bool IsImmMovz(uint64_t imm, unsigned reg_size);
3995
3996 // Instruction bits for vector format in data processing operations.
VFormat(VRegister vd)3997 static Instr VFormat(VRegister vd) {
3998 if (vd.Is64Bits()) {
3999 switch (vd.lanes()) {
4000 case 2: return NEON_2S;
4001 case 4: return NEON_4H;
4002 case 8: return NEON_8B;
4003 default: return 0xffffffff;
4004 }
4005 } else {
4006 VIXL_ASSERT(vd.Is128Bits());
4007 switch (vd.lanes()) {
4008 case 2: return NEON_2D;
4009 case 4: return NEON_4S;
4010 case 8: return NEON_8H;
4011 case 16: return NEON_16B;
4012 default: return 0xffffffff;
4013 }
4014 }
4015 }
4016
4017 // Instruction bits for vector format in floating point data processing
4018 // operations.
FPFormat(VRegister vd)4019 static Instr FPFormat(VRegister vd) {
4020 if (vd.lanes() == 1) {
4021 // Floating point scalar formats.
4022 VIXL_ASSERT(vd.Is32Bits() || vd.Is64Bits());
4023 return vd.Is64Bits() ? FP64 : FP32;
4024 }
4025
4026 // Two lane floating point vector formats.
4027 if (vd.lanes() == 2) {
4028 VIXL_ASSERT(vd.Is64Bits() || vd.Is128Bits());
4029 return vd.Is128Bits() ? NEON_FP_2D : NEON_FP_2S;
4030 }
4031
4032 // Four lane floating point vector format.
4033 VIXL_ASSERT((vd.lanes() == 4) && vd.Is128Bits());
4034 return NEON_FP_4S;
4035 }
4036
4037 // Instruction bits for vector format in load and store operations.
LSVFormat(VRegister vd)4038 static Instr LSVFormat(VRegister vd) {
4039 if (vd.Is64Bits()) {
4040 switch (vd.lanes()) {
4041 case 1: return LS_NEON_1D;
4042 case 2: return LS_NEON_2S;
4043 case 4: return LS_NEON_4H;
4044 case 8: return LS_NEON_8B;
4045 default: return 0xffffffff;
4046 }
4047 } else {
4048 VIXL_ASSERT(vd.Is128Bits());
4049 switch (vd.lanes()) {
4050 case 2: return LS_NEON_2D;
4051 case 4: return LS_NEON_4S;
4052 case 8: return LS_NEON_8H;
4053 case 16: return LS_NEON_16B;
4054 default: return 0xffffffff;
4055 }
4056 }
4057 }
4058
4059 // Instruction bits for scalar format in data processing operations.
SFormat(VRegister vd)4060 static Instr SFormat(VRegister vd) {
4061 VIXL_ASSERT(vd.lanes() == 1);
4062 switch (vd.SizeInBytes()) {
4063 case 1: return NEON_B;
4064 case 2: return NEON_H;
4065 case 4: return NEON_S;
4066 case 8: return NEON_D;
4067 default: return 0xffffffff;
4068 }
4069 }
4070
ImmNEONHLM(int index,int num_bits)4071 static Instr ImmNEONHLM(int index, int num_bits) {
4072 int h, l, m;
4073 if (num_bits == 3) {
4074 VIXL_ASSERT(is_uint3(index));
4075 h = (index >> 2) & 1;
4076 l = (index >> 1) & 1;
4077 m = (index >> 0) & 1;
4078 } else if (num_bits == 2) {
4079 VIXL_ASSERT(is_uint2(index));
4080 h = (index >> 1) & 1;
4081 l = (index >> 0) & 1;
4082 m = 0;
4083 } else {
4084 VIXL_ASSERT(is_uint1(index) && (num_bits == 1));
4085 h = (index >> 0) & 1;
4086 l = 0;
4087 m = 0;
4088 }
4089 return (h << NEONH_offset) | (l << NEONL_offset) | (m << NEONM_offset);
4090 }
4091
ImmNEONExt(int imm4)4092 static Instr ImmNEONExt(int imm4) {
4093 VIXL_ASSERT(is_uint4(imm4));
4094 return imm4 << ImmNEONExt_offset;
4095 }
4096
ImmNEON5(Instr format,int index)4097 static Instr ImmNEON5(Instr format, int index) {
4098 VIXL_ASSERT(is_uint4(index));
4099 int s = LaneSizeInBytesLog2FromFormat(static_cast<VectorFormat>(format));
4100 int imm5 = (index << (s + 1)) | (1 << s);
4101 return imm5 << ImmNEON5_offset;
4102 }
4103
ImmNEON4(Instr format,int index)4104 static Instr ImmNEON4(Instr format, int index) {
4105 VIXL_ASSERT(is_uint4(index));
4106 int s = LaneSizeInBytesLog2FromFormat(static_cast<VectorFormat>(format));
4107 int imm4 = index << s;
4108 return imm4 << ImmNEON4_offset;
4109 }
4110
ImmNEONabcdefgh(int imm8)4111 static Instr ImmNEONabcdefgh(int imm8) {
4112 VIXL_ASSERT(is_uint8(imm8));
4113 Instr instr;
4114 instr = ((imm8 >> 5) & 7) << ImmNEONabc_offset;
4115 instr |= (imm8 & 0x1f) << ImmNEONdefgh_offset;
4116 return instr;
4117 }
4118
NEONCmode(int cmode)4119 static Instr NEONCmode(int cmode) {
4120 VIXL_ASSERT(is_uint4(cmode));
4121 return cmode << NEONCmode_offset;
4122 }
4123
NEONModImmOp(int op)4124 static Instr NEONModImmOp(int op) {
4125 VIXL_ASSERT(is_uint1(op));
4126 return op << NEONModImmOp_offset;
4127 }
4128
4129 // Size of the code generated since label to the current position.
SizeOfCodeGeneratedSince(Label * label)4130 size_t SizeOfCodeGeneratedSince(Label* label) const {
4131 VIXL_ASSERT(label->IsBound());
4132 return buffer_->OffsetFrom(label->location());
4133 }
4134
SizeOfCodeGenerated()4135 size_t SizeOfCodeGenerated() const {
4136 return buffer_->CursorOffset();
4137 }
4138
BufferCapacity()4139 size_t BufferCapacity() const { return buffer_->capacity(); }
4140
RemainingBufferSpace()4141 size_t RemainingBufferSpace() const { return buffer_->RemainingBytes(); }
4142
EnsureSpaceFor(size_t amount)4143 void EnsureSpaceFor(size_t amount) {
4144 if (buffer_->RemainingBytes() < amount) {
4145 size_t capacity = buffer_->capacity();
4146 size_t size = buffer_->CursorOffset();
4147 do {
4148 // TODO(all): refine.
4149 capacity *= 2;
4150 } while ((capacity - size) < amount);
4151 buffer_->Grow(capacity);
4152 }
4153 }
4154
4155 #ifdef VIXL_DEBUG
AcquireBuffer()4156 void AcquireBuffer() {
4157 VIXL_ASSERT(buffer_monitor_ >= 0);
4158 buffer_monitor_++;
4159 }
4160
ReleaseBuffer()4161 void ReleaseBuffer() {
4162 buffer_monitor_--;
4163 VIXL_ASSERT(buffer_monitor_ >= 0);
4164 }
4165 #endif
4166
pic()4167 PositionIndependentCodeOption pic() const {
4168 return pic_;
4169 }
4170
AllowPageOffsetDependentCode()4171 bool AllowPageOffsetDependentCode() const {
4172 return (pic() == PageOffsetDependentCode) ||
4173 (pic() == PositionDependentCode);
4174 }
4175
AppropriateZeroRegFor(const CPURegister & reg)4176 static const Register& AppropriateZeroRegFor(const CPURegister& reg) {
4177 return reg.Is64Bits() ? xzr : wzr;
4178 }
4179
4180
4181 protected:
4182 void LoadStore(const CPURegister& rt,
4183 const MemOperand& addr,
4184 LoadStoreOp op,
4185 LoadStoreScalingOption option = PreferScaledOffset);
4186
4187 void LoadStorePair(const CPURegister& rt,
4188 const CPURegister& rt2,
4189 const MemOperand& addr,
4190 LoadStorePairOp op);
4191 void LoadStoreStruct(const VRegister& vt,
4192 const MemOperand& addr,
4193 NEONLoadStoreMultiStructOp op);
4194 void LoadStoreStruct1(const VRegister& vt,
4195 int reg_count,
4196 const MemOperand& addr);
4197 void LoadStoreStructSingle(const VRegister& vt,
4198 uint32_t lane,
4199 const MemOperand& addr,
4200 NEONLoadStoreSingleStructOp op);
4201 void LoadStoreStructSingleAllLanes(const VRegister& vt,
4202 const MemOperand& addr,
4203 NEONLoadStoreSingleStructOp op);
4204 void LoadStoreStructVerify(const VRegister& vt,
4205 const MemOperand& addr,
4206 Instr op);
4207
4208 void Prefetch(PrefetchOperation op,
4209 const MemOperand& addr,
4210 LoadStoreScalingOption option = PreferScaledOffset);
4211
4212 // TODO(all): The third parameter should be passed by reference but gcc 4.8.2
4213 // reports a bogus uninitialised warning then.
4214 void Logical(const Register& rd,
4215 const Register& rn,
4216 const Operand operand,
4217 LogicalOp op);
4218 void LogicalImmediate(const Register& rd,
4219 const Register& rn,
4220 unsigned n,
4221 unsigned imm_s,
4222 unsigned imm_r,
4223 LogicalOp op);
4224
4225 void ConditionalCompare(const Register& rn,
4226 const Operand& operand,
4227 StatusFlags nzcv,
4228 Condition cond,
4229 ConditionalCompareOp op);
4230
4231 void AddSubWithCarry(const Register& rd,
4232 const Register& rn,
4233 const Operand& operand,
4234 FlagsUpdate S,
4235 AddSubWithCarryOp op);
4236
4237
4238 // Functions for emulating operands not directly supported by the instruction
4239 // set.
4240 void EmitShift(const Register& rd,
4241 const Register& rn,
4242 Shift shift,
4243 unsigned amount);
4244 void EmitExtendShift(const Register& rd,
4245 const Register& rn,
4246 Extend extend,
4247 unsigned left_shift);
4248
4249 void AddSub(const Register& rd,
4250 const Register& rn,
4251 const Operand& operand,
4252 FlagsUpdate S,
4253 AddSubOp op);
4254
4255 void NEONTable(const VRegister& vd,
4256 const VRegister& vn,
4257 const VRegister& vm,
4258 NEONTableOp op);
4259
4260 // Find an appropriate LoadStoreOp or LoadStorePairOp for the specified
4261 // registers. Only simple loads are supported; sign- and zero-extension (such
4262 // as in LDPSW_x or LDRB_w) are not supported.
4263 static LoadStoreOp LoadOpFor(const CPURegister& rt);
4264 static LoadStorePairOp LoadPairOpFor(const CPURegister& rt,
4265 const CPURegister& rt2);
4266 static LoadStoreOp StoreOpFor(const CPURegister& rt);
4267 static LoadStorePairOp StorePairOpFor(const CPURegister& rt,
4268 const CPURegister& rt2);
4269 static LoadStorePairNonTemporalOp LoadPairNonTemporalOpFor(
4270 const CPURegister& rt, const CPURegister& rt2);
4271 static LoadStorePairNonTemporalOp StorePairNonTemporalOpFor(
4272 const CPURegister& rt, const CPURegister& rt2);
4273 static LoadLiteralOp LoadLiteralOpFor(const CPURegister& rt);
4274
4275
4276 private:
4277 static uint32_t FP32ToImm8(float imm);
4278 static uint32_t FP64ToImm8(double imm);
4279
4280 // Instruction helpers.
4281 void MoveWide(const Register& rd,
4282 uint64_t imm,
4283 int shift,
4284 MoveWideImmediateOp mov_op);
4285 void DataProcShiftedRegister(const Register& rd,
4286 const Register& rn,
4287 const Operand& operand,
4288 FlagsUpdate S,
4289 Instr op);
4290 void DataProcExtendedRegister(const Register& rd,
4291 const Register& rn,
4292 const Operand& operand,
4293 FlagsUpdate S,
4294 Instr op);
4295 void LoadStorePairNonTemporal(const CPURegister& rt,
4296 const CPURegister& rt2,
4297 const MemOperand& addr,
4298 LoadStorePairNonTemporalOp op);
4299 void LoadLiteral(const CPURegister& rt, uint64_t imm, LoadLiteralOp op);
4300 void ConditionalSelect(const Register& rd,
4301 const Register& rn,
4302 const Register& rm,
4303 Condition cond,
4304 ConditionalSelectOp op);
4305 void DataProcessing1Source(const Register& rd,
4306 const Register& rn,
4307 DataProcessing1SourceOp op);
4308 void DataProcessing3Source(const Register& rd,
4309 const Register& rn,
4310 const Register& rm,
4311 const Register& ra,
4312 DataProcessing3SourceOp op);
4313 void FPDataProcessing1Source(const VRegister& fd,
4314 const VRegister& fn,
4315 FPDataProcessing1SourceOp op);
4316 void FPDataProcessing3Source(const VRegister& fd,
4317 const VRegister& fn,
4318 const VRegister& fm,
4319 const VRegister& fa,
4320 FPDataProcessing3SourceOp op);
4321 void NEONAcrossLanesL(const VRegister& vd,
4322 const VRegister& vn,
4323 NEONAcrossLanesOp op);
4324 void NEONAcrossLanes(const VRegister& vd,
4325 const VRegister& vn,
4326 NEONAcrossLanesOp op);
4327 void NEONModifiedImmShiftLsl(const VRegister& vd,
4328 const int imm8,
4329 const int left_shift,
4330 NEONModifiedImmediateOp op);
4331 void NEONModifiedImmShiftMsl(const VRegister& vd,
4332 const int imm8,
4333 const int shift_amount,
4334 NEONModifiedImmediateOp op);
4335 void NEONFP2Same(const VRegister& vd,
4336 const VRegister& vn,
4337 Instr vop);
4338 void NEON3Same(const VRegister& vd,
4339 const VRegister& vn,
4340 const VRegister& vm,
4341 NEON3SameOp vop);
4342 void NEONFP3Same(const VRegister& vd,
4343 const VRegister& vn,
4344 const VRegister& vm,
4345 Instr op);
4346 void NEON3DifferentL(const VRegister& vd,
4347 const VRegister& vn,
4348 const VRegister& vm,
4349 NEON3DifferentOp vop);
4350 void NEON3DifferentW(const VRegister& vd,
4351 const VRegister& vn,
4352 const VRegister& vm,
4353 NEON3DifferentOp vop);
4354 void NEON3DifferentHN(const VRegister& vd,
4355 const VRegister& vn,
4356 const VRegister& vm,
4357 NEON3DifferentOp vop);
4358 void NEONFP2RegMisc(const VRegister& vd,
4359 const VRegister& vn,
4360 NEON2RegMiscOp vop,
4361 double value = 0.0);
4362 void NEON2RegMisc(const VRegister& vd,
4363 const VRegister& vn,
4364 NEON2RegMiscOp vop,
4365 int value = 0);
4366 void NEONFP2RegMisc(const VRegister& vd,
4367 const VRegister& vn,
4368 Instr op);
4369 void NEONAddlp(const VRegister& vd,
4370 const VRegister& vn,
4371 NEON2RegMiscOp op);
4372 void NEONPerm(const VRegister& vd,
4373 const VRegister& vn,
4374 const VRegister& vm,
4375 NEONPermOp op);
4376 void NEONFPByElement(const VRegister& vd,
4377 const VRegister& vn,
4378 const VRegister& vm,
4379 int vm_index,
4380 NEONByIndexedElementOp op);
4381 void NEONByElement(const VRegister& vd,
4382 const VRegister& vn,
4383 const VRegister& vm,
4384 int vm_index,
4385 NEONByIndexedElementOp op);
4386 void NEONByElementL(const VRegister& vd,
4387 const VRegister& vn,
4388 const VRegister& vm,
4389 int vm_index,
4390 NEONByIndexedElementOp op);
4391 void NEONShiftImmediate(const VRegister& vd,
4392 const VRegister& vn,
4393 NEONShiftImmediateOp op,
4394 int immh_immb);
4395 void NEONShiftLeftImmediate(const VRegister& vd,
4396 const VRegister& vn,
4397 int shift,
4398 NEONShiftImmediateOp op);
4399 void NEONShiftRightImmediate(const VRegister& vd,
4400 const VRegister& vn,
4401 int shift,
4402 NEONShiftImmediateOp op);
4403 void NEONShiftImmediateL(const VRegister& vd,
4404 const VRegister& vn,
4405 int shift,
4406 NEONShiftImmediateOp op);
4407 void NEONShiftImmediateN(const VRegister& vd,
4408 const VRegister& vn,
4409 int shift,
4410 NEONShiftImmediateOp op);
4411 void NEONXtn(const VRegister& vd,
4412 const VRegister& vn,
4413 NEON2RegMiscOp vop);
4414
4415 Instr LoadStoreStructAddrModeField(const MemOperand& addr);
4416
4417 // Encode the specified MemOperand for the specified access size and scaling
4418 // preference.
4419 Instr LoadStoreMemOperand(const MemOperand& addr,
4420 unsigned access_size,
4421 LoadStoreScalingOption option);
4422
4423 // Link the current (not-yet-emitted) instruction to the specified label, then
4424 // return an offset to be encoded in the instruction. If the label is not yet
4425 // bound, an offset of 0 is returned.
4426 ptrdiff_t LinkAndGetByteOffsetTo(Label * label);
4427 ptrdiff_t LinkAndGetInstructionOffsetTo(Label * label);
4428 ptrdiff_t LinkAndGetPageOffsetTo(Label * label);
4429
4430 // A common implementation for the LinkAndGet<Type>OffsetTo helpers.
4431 template <int element_shift>
4432 ptrdiff_t LinkAndGetOffsetTo(Label* label);
4433
4434 // Literal load offset are in words (32-bit).
4435 ptrdiff_t LinkAndGetWordOffsetTo(RawLiteral* literal);
4436
4437 // Emit the instruction in buffer_.
Emit(Instr instruction)4438 void Emit(Instr instruction) {
4439 VIXL_STATIC_ASSERT(sizeof(instruction) == kInstructionSize);
4440 VIXL_ASSERT(buffer_monitor_ > 0);
4441 buffer_->Emit32(instruction);
4442 }
4443
4444 // Buffer where the code is emitted.
4445 CodeBuffer* buffer_;
4446 PositionIndependentCodeOption pic_;
4447
4448 #ifdef VIXL_DEBUG
4449 int64_t buffer_monitor_;
4450 #endif
4451 };
4452
4453
4454 // All Assembler emits MUST acquire/release the underlying code buffer. The
4455 // helper scope below will do so and optionally ensure the buffer is big enough
4456 // to receive the emit. It is possible to request the scope not to perform any
4457 // checks (kNoCheck) if for example it is known in advance the buffer size is
4458 // adequate or there is some other size checking mechanism in place.
4459 class CodeBufferCheckScope {
4460 public:
4461 // Tell whether or not the scope needs to ensure the associated CodeBuffer
4462 // has enough space for the requested size.
4463 enum CheckPolicy {
4464 kNoCheck,
4465 kCheck
4466 };
4467
4468 // Tell whether or not the scope should assert the amount of code emitted
4469 // within the scope is consistent with the requested amount.
4470 enum AssertPolicy {
4471 kNoAssert, // No assert required.
4472 kExactSize, // The code emitted must be exactly size bytes.
4473 kMaximumSize // The code emitted must be at most size bytes.
4474 };
4475
4476 CodeBufferCheckScope(Assembler* assm,
4477 size_t size,
4478 CheckPolicy check_policy = kCheck,
4479 AssertPolicy assert_policy = kMaximumSize)
assm_(assm)4480 : assm_(assm) {
4481 if (check_policy == kCheck) assm->EnsureSpaceFor(size);
4482 #ifdef VIXL_DEBUG
4483 assm->bind(&start_);
4484 size_ = size;
4485 assert_policy_ = assert_policy;
4486 assm->AcquireBuffer();
4487 #else
4488 USE(assert_policy);
4489 #endif
4490 }
4491
4492 // This is a shortcut for CodeBufferCheckScope(assm, 0, kNoCheck, kNoAssert).
CodeBufferCheckScope(Assembler * assm)4493 explicit CodeBufferCheckScope(Assembler* assm) : assm_(assm) {
4494 #ifdef VIXL_DEBUG
4495 size_ = 0;
4496 assert_policy_ = kNoAssert;
4497 assm->AcquireBuffer();
4498 #endif
4499 }
4500
~CodeBufferCheckScope()4501 ~CodeBufferCheckScope() {
4502 #ifdef VIXL_DEBUG
4503 assm_->ReleaseBuffer();
4504 switch (assert_policy_) {
4505 case kNoAssert: break;
4506 case kExactSize:
4507 VIXL_ASSERT(assm_->SizeOfCodeGeneratedSince(&start_) == size_);
4508 break;
4509 case kMaximumSize:
4510 VIXL_ASSERT(assm_->SizeOfCodeGeneratedSince(&start_) <= size_);
4511 break;
4512 default:
4513 VIXL_UNREACHABLE();
4514 }
4515 #endif
4516 }
4517
4518 protected:
4519 Assembler* assm_;
4520 #ifdef VIXL_DEBUG
4521 Label start_;
4522 size_t size_;
4523 AssertPolicy assert_policy_;
4524 #endif
4525 };
4526
4527 } // namespace vixl
4528
4529 #endif // VIXL_A64_ASSEMBLER_A64_H_
4530