1 // Copyright 2015, ARM Limited
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are met:
6 //
7 // * Redistributions of source code must retain the above copyright notice,
8 // this list of conditions and the following disclaimer.
9 // * Redistributions in binary form must reproduce the above copyright notice,
10 // this list of conditions and the following disclaimer in the documentation
11 // and/or other materials provided with the distribution.
12 // * Neither the name of ARM Limited nor the names of its contributors may be
13 // used to endorse or promote products derived from this software without
14 // specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
27 #ifndef VIXL_A64_ASSEMBLER_A64_H_
28 #define VIXL_A64_ASSEMBLER_A64_H_
29
30
31 #include "vixl/globals.h"
32 #include "vixl/invalset.h"
33 #include "vixl/utils.h"
34 #include "vixl/code-buffer.h"
35 #include "vixl/a64/instructions-a64.h"
36
37 namespace vixl {
38
39 typedef uint64_t RegList;
40 static const int kRegListSizeInBits = sizeof(RegList) * 8;
41
42
43 // Registers.
44
45 // Some CPURegister methods can return Register or VRegister types, so we need
46 // to declare them in advance.
47 class Register;
48 class VRegister;
49
50 class CPURegister {
51 public:
52 enum RegisterType {
53 // The kInvalid value is used to detect uninitialized static instances,
54 // which are always zero-initialized before any constructors are called.
55 kInvalid = 0,
56 kRegister,
57 kVRegister,
58 kFPRegister = kVRegister,
59 kNoRegister
60 };
61
CPURegister()62 CPURegister() : code_(0), size_(0), type_(kNoRegister) {
63 VIXL_ASSERT(!IsValid());
64 VIXL_ASSERT(IsNone());
65 }
66
CPURegister(unsigned code,unsigned size,RegisterType type)67 CPURegister(unsigned code, unsigned size, RegisterType type)
68 : code_(code), size_(size), type_(type) {
69 VIXL_ASSERT(IsValidOrNone());
70 }
71
code()72 unsigned code() const {
73 VIXL_ASSERT(IsValid());
74 return code_;
75 }
76
type()77 RegisterType type() const {
78 VIXL_ASSERT(IsValidOrNone());
79 return type_;
80 }
81
Bit()82 RegList Bit() const {
83 VIXL_ASSERT(code_ < (sizeof(RegList) * 8));
84 return IsValid() ? (static_cast<RegList>(1) << code_) : 0;
85 }
86
size()87 unsigned size() const {
88 VIXL_ASSERT(IsValid());
89 return size_;
90 }
91
SizeInBytes()92 int SizeInBytes() const {
93 VIXL_ASSERT(IsValid());
94 VIXL_ASSERT(size() % 8 == 0);
95 return size_ / 8;
96 }
97
SizeInBits()98 int SizeInBits() const {
99 VIXL_ASSERT(IsValid());
100 return size_;
101 }
102
Is8Bits()103 bool Is8Bits() const {
104 VIXL_ASSERT(IsValid());
105 return size_ == 8;
106 }
107
Is16Bits()108 bool Is16Bits() const {
109 VIXL_ASSERT(IsValid());
110 return size_ == 16;
111 }
112
Is32Bits()113 bool Is32Bits() const {
114 VIXL_ASSERT(IsValid());
115 return size_ == 32;
116 }
117
Is64Bits()118 bool Is64Bits() const {
119 VIXL_ASSERT(IsValid());
120 return size_ == 64;
121 }
122
Is128Bits()123 bool Is128Bits() const {
124 VIXL_ASSERT(IsValid());
125 return size_ == 128;
126 }
127
IsValid()128 bool IsValid() const {
129 if (IsValidRegister() || IsValidVRegister()) {
130 VIXL_ASSERT(!IsNone());
131 return true;
132 } else {
133 // This assert is hit when the register has not been properly initialized.
134 // One cause for this can be an initialisation order fiasco. See
135 // https://isocpp.org/wiki/faq/ctors#static-init-order for some details.
136 VIXL_ASSERT(IsNone());
137 return false;
138 }
139 }
140
IsValidRegister()141 bool IsValidRegister() const {
142 return IsRegister() &&
143 ((size_ == kWRegSize) || (size_ == kXRegSize)) &&
144 ((code_ < kNumberOfRegisters) || (code_ == kSPRegInternalCode));
145 }
146
IsValidVRegister()147 bool IsValidVRegister() const {
148 return IsVRegister() &&
149 ((size_ == kBRegSize) || (size_ == kHRegSize) ||
150 (size_ == kSRegSize) || (size_ == kDRegSize) ||
151 (size_ == kQRegSize)) &&
152 (code_ < kNumberOfVRegisters);
153 }
154
IsValidFPRegister()155 bool IsValidFPRegister() const {
156 return IsFPRegister() && (code_ < kNumberOfVRegisters);
157 }
158
IsNone()159 bool IsNone() const {
160 // kNoRegister types should always have size 0 and code 0.
161 VIXL_ASSERT((type_ != kNoRegister) || (code_ == 0));
162 VIXL_ASSERT((type_ != kNoRegister) || (size_ == 0));
163
164 return type_ == kNoRegister;
165 }
166
Aliases(const CPURegister & other)167 bool Aliases(const CPURegister& other) const {
168 VIXL_ASSERT(IsValidOrNone() && other.IsValidOrNone());
169 return (code_ == other.code_) && (type_ == other.type_);
170 }
171
Is(const CPURegister & other)172 bool Is(const CPURegister& other) const {
173 VIXL_ASSERT(IsValidOrNone() && other.IsValidOrNone());
174 return Aliases(other) && (size_ == other.size_);
175 }
176
IsZero()177 bool IsZero() const {
178 VIXL_ASSERT(IsValid());
179 return IsRegister() && (code_ == kZeroRegCode);
180 }
181
IsSP()182 bool IsSP() const {
183 VIXL_ASSERT(IsValid());
184 return IsRegister() && (code_ == kSPRegInternalCode);
185 }
186
IsRegister()187 bool IsRegister() const {
188 return type_ == kRegister;
189 }
190
IsVRegister()191 bool IsVRegister() const {
192 return type_ == kVRegister;
193 }
194
IsFPRegister()195 bool IsFPRegister() const {
196 return IsS() || IsD();
197 }
198
IsW()199 bool IsW() const { return IsValidRegister() && Is32Bits(); }
IsX()200 bool IsX() const { return IsValidRegister() && Is64Bits(); }
201
202 // These assertions ensure that the size and type of the register are as
203 // described. They do not consider the number of lanes that make up a vector.
204 // So, for example, Is8B() implies IsD(), and Is1D() implies IsD, but IsD()
205 // does not imply Is1D() or Is8B().
206 // Check the number of lanes, ie. the format of the vector, using methods such
207 // as Is8B(), Is1D(), etc. in the VRegister class.
IsV()208 bool IsV() const { return IsVRegister(); }
IsB()209 bool IsB() const { return IsV() && Is8Bits(); }
IsH()210 bool IsH() const { return IsV() && Is16Bits(); }
IsS()211 bool IsS() const { return IsV() && Is32Bits(); }
IsD()212 bool IsD() const { return IsV() && Is64Bits(); }
IsQ()213 bool IsQ() const { return IsV() && Is128Bits(); }
214
215 const Register& W() const;
216 const Register& X() const;
217 const VRegister& V() const;
218 const VRegister& B() const;
219 const VRegister& H() const;
220 const VRegister& S() const;
221 const VRegister& D() const;
222 const VRegister& Q() const;
223
IsSameSizeAndType(const CPURegister & other)224 bool IsSameSizeAndType(const CPURegister& other) const {
225 return (size_ == other.size_) && (type_ == other.type_);
226 }
227
228 protected:
229 unsigned code_;
230 unsigned size_;
231 RegisterType type_;
232
233 private:
IsValidOrNone()234 bool IsValidOrNone() const {
235 return IsValid() || IsNone();
236 }
237 };
238
239
240 class Register : public CPURegister {
241 public:
Register()242 Register() : CPURegister() {}
Register(const CPURegister & other)243 explicit Register(const CPURegister& other)
244 : CPURegister(other.code(), other.size(), other.type()) {
245 VIXL_ASSERT(IsValidRegister());
246 }
Register(unsigned code,unsigned size)247 Register(unsigned code, unsigned size)
248 : CPURegister(code, size, kRegister) {}
249
IsValid()250 bool IsValid() const {
251 VIXL_ASSERT(IsRegister() || IsNone());
252 return IsValidRegister();
253 }
254
255 static const Register& WRegFromCode(unsigned code);
256 static const Register& XRegFromCode(unsigned code);
257
258 private:
259 static const Register wregisters[];
260 static const Register xregisters[];
261 };
262
263
264 class VRegister : public CPURegister {
265 public:
VRegister()266 VRegister() : CPURegister(), lanes_(1) {}
VRegister(const CPURegister & other)267 explicit VRegister(const CPURegister& other)
268 : CPURegister(other.code(), other.size(), other.type()), lanes_(1) {
269 VIXL_ASSERT(IsValidVRegister());
270 VIXL_ASSERT(IsPowerOf2(lanes_) && (lanes_ <= 16));
271 }
272 VRegister(unsigned code, unsigned size, unsigned lanes = 1)
CPURegister(code,size,kVRegister)273 : CPURegister(code, size, kVRegister), lanes_(lanes) {
274 VIXL_ASSERT(IsPowerOf2(lanes_) && (lanes_ <= 16));
275 }
VRegister(unsigned code,VectorFormat format)276 VRegister(unsigned code, VectorFormat format)
277 : CPURegister(code, RegisterSizeInBitsFromFormat(format), kVRegister),
278 lanes_(IsVectorFormat(format) ? LaneCountFromFormat(format) : 1) {
279 VIXL_ASSERT(IsPowerOf2(lanes_) && (lanes_ <= 16));
280 }
281
IsValid()282 bool IsValid() const {
283 VIXL_ASSERT(IsVRegister() || IsNone());
284 return IsValidVRegister();
285 }
286
287 static const VRegister& BRegFromCode(unsigned code);
288 static const VRegister& HRegFromCode(unsigned code);
289 static const VRegister& SRegFromCode(unsigned code);
290 static const VRegister& DRegFromCode(unsigned code);
291 static const VRegister& QRegFromCode(unsigned code);
292 static const VRegister& VRegFromCode(unsigned code);
293
V8B()294 VRegister V8B() const { return VRegister(code_, kDRegSize, 8); }
V16B()295 VRegister V16B() const { return VRegister(code_, kQRegSize, 16); }
V4H()296 VRegister V4H() const { return VRegister(code_, kDRegSize, 4); }
V8H()297 VRegister V8H() const { return VRegister(code_, kQRegSize, 8); }
V2S()298 VRegister V2S() const { return VRegister(code_, kDRegSize, 2); }
V4S()299 VRegister V4S() const { return VRegister(code_, kQRegSize, 4); }
V2D()300 VRegister V2D() const { return VRegister(code_, kQRegSize, 2); }
V1D()301 VRegister V1D() const { return VRegister(code_, kDRegSize, 1); }
302
Is8B()303 bool Is8B() const { return (Is64Bits() && (lanes_ == 8)); }
Is16B()304 bool Is16B() const { return (Is128Bits() && (lanes_ == 16)); }
Is4H()305 bool Is4H() const { return (Is64Bits() && (lanes_ == 4)); }
Is8H()306 bool Is8H() const { return (Is128Bits() && (lanes_ == 8)); }
Is2S()307 bool Is2S() const { return (Is64Bits() && (lanes_ == 2)); }
Is4S()308 bool Is4S() const { return (Is128Bits() && (lanes_ == 4)); }
Is1D()309 bool Is1D() const { return (Is64Bits() && (lanes_ == 1)); }
Is2D()310 bool Is2D() const { return (Is128Bits() && (lanes_ == 2)); }
311
312 // For consistency, we assert the number of lanes of these scalar registers,
313 // even though there are no vectors of equivalent total size with which they
314 // could alias.
Is1B()315 bool Is1B() const {
316 VIXL_ASSERT(!(Is8Bits() && IsVector()));
317 return Is8Bits();
318 }
Is1H()319 bool Is1H() const {
320 VIXL_ASSERT(!(Is16Bits() && IsVector()));
321 return Is16Bits();
322 }
Is1S()323 bool Is1S() const {
324 VIXL_ASSERT(!(Is32Bits() && IsVector()));
325 return Is32Bits();
326 }
327
IsLaneSizeB()328 bool IsLaneSizeB() const { return LaneSizeInBits() == kBRegSize; }
IsLaneSizeH()329 bool IsLaneSizeH() const { return LaneSizeInBits() == kHRegSize; }
IsLaneSizeS()330 bool IsLaneSizeS() const { return LaneSizeInBits() == kSRegSize; }
IsLaneSizeD()331 bool IsLaneSizeD() const { return LaneSizeInBits() == kDRegSize; }
332
lanes()333 int lanes() const {
334 return lanes_;
335 }
336
IsScalar()337 bool IsScalar() const {
338 return lanes_ == 1;
339 }
340
IsVector()341 bool IsVector() const {
342 return lanes_ > 1;
343 }
344
IsSameFormat(const VRegister & other)345 bool IsSameFormat(const VRegister& other) const {
346 return (size_ == other.size_) && (lanes_ == other.lanes_);
347 }
348
LaneSizeInBytes()349 unsigned LaneSizeInBytes() const {
350 return SizeInBytes() / lanes_;
351 }
352
LaneSizeInBits()353 unsigned LaneSizeInBits() const {
354 return LaneSizeInBytes() * 8;
355 }
356
357 private:
358 static const VRegister bregisters[];
359 static const VRegister hregisters[];
360 static const VRegister sregisters[];
361 static const VRegister dregisters[];
362 static const VRegister qregisters[];
363 static const VRegister vregisters[];
364 int lanes_;
365 };
366
367
368 // Backward compatibility for FPRegisters.
369 typedef VRegister FPRegister;
370
371 // No*Reg is used to indicate an unused argument, or an error case. Note that
372 // these all compare equal (using the Is() method). The Register and VRegister
373 // variants are provided for convenience.
374 const Register NoReg;
375 const VRegister NoVReg;
376 const FPRegister NoFPReg; // For backward compatibility.
377 const CPURegister NoCPUReg;
378
379
380 #define DEFINE_REGISTERS(N) \
381 const Register w##N(N, kWRegSize); \
382 const Register x##N(N, kXRegSize);
383 REGISTER_CODE_LIST(DEFINE_REGISTERS)
384 #undef DEFINE_REGISTERS
385 const Register wsp(kSPRegInternalCode, kWRegSize);
386 const Register sp(kSPRegInternalCode, kXRegSize);
387
388
389 #define DEFINE_VREGISTERS(N) \
390 const VRegister b##N(N, kBRegSize); \
391 const VRegister h##N(N, kHRegSize); \
392 const VRegister s##N(N, kSRegSize); \
393 const VRegister d##N(N, kDRegSize); \
394 const VRegister q##N(N, kQRegSize); \
395 const VRegister v##N(N, kQRegSize);
396 REGISTER_CODE_LIST(DEFINE_VREGISTERS)
397 #undef DEFINE_VREGISTERS
398
399
400 // Registers aliases.
401 const Register ip0 = x16;
402 const Register ip1 = x17;
403 const Register lr = x30;
404 const Register xzr = x31;
405 const Register wzr = w31;
406
407
408 // AreAliased returns true if any of the named registers overlap. Arguments
409 // set to NoReg are ignored. The system stack pointer may be specified.
410 bool AreAliased(const CPURegister& reg1,
411 const CPURegister& reg2,
412 const CPURegister& reg3 = NoReg,
413 const CPURegister& reg4 = NoReg,
414 const CPURegister& reg5 = NoReg,
415 const CPURegister& reg6 = NoReg,
416 const CPURegister& reg7 = NoReg,
417 const CPURegister& reg8 = NoReg);
418
419
420 // AreSameSizeAndType returns true if all of the specified registers have the
421 // same size, and are of the same type. The system stack pointer may be
422 // specified. Arguments set to NoReg are ignored, as are any subsequent
423 // arguments. At least one argument (reg1) must be valid (not NoCPUReg).
424 bool AreSameSizeAndType(const CPURegister& reg1,
425 const CPURegister& reg2,
426 const CPURegister& reg3 = NoCPUReg,
427 const CPURegister& reg4 = NoCPUReg,
428 const CPURegister& reg5 = NoCPUReg,
429 const CPURegister& reg6 = NoCPUReg,
430 const CPURegister& reg7 = NoCPUReg,
431 const CPURegister& reg8 = NoCPUReg);
432
433
434 // AreSameFormat returns true if all of the specified VRegisters have the same
435 // vector format. Arguments set to NoReg are ignored, as are any subsequent
436 // arguments. At least one argument (reg1) must be valid (not NoVReg).
437 bool AreSameFormat(const VRegister& reg1,
438 const VRegister& reg2,
439 const VRegister& reg3 = NoVReg,
440 const VRegister& reg4 = NoVReg);
441
442
443 // AreConsecutive returns true if all of the specified VRegisters are
444 // consecutive in the register file. Arguments set to NoReg are ignored, as are
445 // any subsequent arguments. At least one argument (reg1) must be valid
446 // (not NoVReg).
447 bool AreConsecutive(const VRegister& reg1,
448 const VRegister& reg2,
449 const VRegister& reg3 = NoVReg,
450 const VRegister& reg4 = NoVReg);
451
452
453 // Lists of registers.
454 class CPURegList {
455 public:
456 explicit CPURegList(CPURegister reg1,
457 CPURegister reg2 = NoCPUReg,
458 CPURegister reg3 = NoCPUReg,
459 CPURegister reg4 = NoCPUReg)
460 : list_(reg1.Bit() | reg2.Bit() | reg3.Bit() | reg4.Bit()),
461 size_(reg1.size()), type_(reg1.type()) {
462 VIXL_ASSERT(AreSameSizeAndType(reg1, reg2, reg3, reg4));
463 VIXL_ASSERT(IsValid());
464 }
465
CPURegList(CPURegister::RegisterType type,unsigned size,RegList list)466 CPURegList(CPURegister::RegisterType type, unsigned size, RegList list)
467 : list_(list), size_(size), type_(type) {
468 VIXL_ASSERT(IsValid());
469 }
470
CPURegList(CPURegister::RegisterType type,unsigned size,unsigned first_reg,unsigned last_reg)471 CPURegList(CPURegister::RegisterType type, unsigned size,
472 unsigned first_reg, unsigned last_reg)
473 : size_(size), type_(type) {
474 VIXL_ASSERT(((type == CPURegister::kRegister) &&
475 (last_reg < kNumberOfRegisters)) ||
476 ((type == CPURegister::kVRegister) &&
477 (last_reg < kNumberOfVRegisters)));
478 VIXL_ASSERT(last_reg >= first_reg);
479 list_ = (UINT64_C(1) << (last_reg + 1)) - 1;
480 list_ &= ~((UINT64_C(1) << first_reg) - 1);
481 VIXL_ASSERT(IsValid());
482 }
483
type()484 CPURegister::RegisterType type() const {
485 VIXL_ASSERT(IsValid());
486 return type_;
487 }
488
489 // Combine another CPURegList into this one. Registers that already exist in
490 // this list are left unchanged. The type and size of the registers in the
491 // 'other' list must match those in this list.
Combine(const CPURegList & other)492 void Combine(const CPURegList& other) {
493 VIXL_ASSERT(IsValid());
494 VIXL_ASSERT(other.type() == type_);
495 VIXL_ASSERT(other.RegisterSizeInBits() == size_);
496 list_ |= other.list();
497 }
498
499 // Remove every register in the other CPURegList from this one. Registers that
500 // do not exist in this list are ignored. The type and size of the registers
501 // in the 'other' list must match those in this list.
Remove(const CPURegList & other)502 void Remove(const CPURegList& other) {
503 VIXL_ASSERT(IsValid());
504 VIXL_ASSERT(other.type() == type_);
505 VIXL_ASSERT(other.RegisterSizeInBits() == size_);
506 list_ &= ~other.list();
507 }
508
509 // Variants of Combine and Remove which take a single register.
Combine(const CPURegister & other)510 void Combine(const CPURegister& other) {
511 VIXL_ASSERT(other.type() == type_);
512 VIXL_ASSERT(other.size() == size_);
513 Combine(other.code());
514 }
515
Remove(const CPURegister & other)516 void Remove(const CPURegister& other) {
517 VIXL_ASSERT(other.type() == type_);
518 VIXL_ASSERT(other.size() == size_);
519 Remove(other.code());
520 }
521
522 // Variants of Combine and Remove which take a single register by its code;
523 // the type and size of the register is inferred from this list.
Combine(int code)524 void Combine(int code) {
525 VIXL_ASSERT(IsValid());
526 VIXL_ASSERT(CPURegister(code, size_, type_).IsValid());
527 list_ |= (UINT64_C(1) << code);
528 }
529
Remove(int code)530 void Remove(int code) {
531 VIXL_ASSERT(IsValid());
532 VIXL_ASSERT(CPURegister(code, size_, type_).IsValid());
533 list_ &= ~(UINT64_C(1) << code);
534 }
535
Union(const CPURegList & list_1,const CPURegList & list_2)536 static CPURegList Union(const CPURegList& list_1, const CPURegList& list_2) {
537 VIXL_ASSERT(list_1.type_ == list_2.type_);
538 VIXL_ASSERT(list_1.size_ == list_2.size_);
539 return CPURegList(list_1.type_, list_1.size_, list_1.list_ | list_2.list_);
540 }
541 static CPURegList Union(const CPURegList& list_1,
542 const CPURegList& list_2,
543 const CPURegList& list_3);
544 static CPURegList Union(const CPURegList& list_1,
545 const CPURegList& list_2,
546 const CPURegList& list_3,
547 const CPURegList& list_4);
548
Intersection(const CPURegList & list_1,const CPURegList & list_2)549 static CPURegList Intersection(const CPURegList& list_1,
550 const CPURegList& list_2) {
551 VIXL_ASSERT(list_1.type_ == list_2.type_);
552 VIXL_ASSERT(list_1.size_ == list_2.size_);
553 return CPURegList(list_1.type_, list_1.size_, list_1.list_ & list_2.list_);
554 }
555 static CPURegList Intersection(const CPURegList& list_1,
556 const CPURegList& list_2,
557 const CPURegList& list_3);
558 static CPURegList Intersection(const CPURegList& list_1,
559 const CPURegList& list_2,
560 const CPURegList& list_3,
561 const CPURegList& list_4);
562
Overlaps(const CPURegList & other)563 bool Overlaps(const CPURegList& other) const {
564 return (type_ == other.type_) && ((list_ & other.list_) != 0);
565 }
566
list()567 RegList list() const {
568 VIXL_ASSERT(IsValid());
569 return list_;
570 }
571
set_list(RegList new_list)572 void set_list(RegList new_list) {
573 VIXL_ASSERT(IsValid());
574 list_ = new_list;
575 }
576
577 // Remove all callee-saved registers from the list. This can be useful when
578 // preparing registers for an AAPCS64 function call, for example.
579 void RemoveCalleeSaved();
580
581 CPURegister PopLowestIndex();
582 CPURegister PopHighestIndex();
583
584 // AAPCS64 callee-saved registers.
585 static CPURegList GetCalleeSaved(unsigned size = kXRegSize);
586 static CPURegList GetCalleeSavedV(unsigned size = kDRegSize);
587
588 // AAPCS64 caller-saved registers. Note that this includes lr.
589 // TODO(all): Determine how we handle d8-d15 being callee-saved, but the top
590 // 64-bits being caller-saved.
591 static CPURegList GetCallerSaved(unsigned size = kXRegSize);
592 static CPURegList GetCallerSavedV(unsigned size = kDRegSize);
593
IsEmpty()594 bool IsEmpty() const {
595 VIXL_ASSERT(IsValid());
596 return list_ == 0;
597 }
598
IncludesAliasOf(const CPURegister & other)599 bool IncludesAliasOf(const CPURegister& other) const {
600 VIXL_ASSERT(IsValid());
601 return (type_ == other.type()) && ((other.Bit() & list_) != 0);
602 }
603
IncludesAliasOf(int code)604 bool IncludesAliasOf(int code) const {
605 VIXL_ASSERT(IsValid());
606 return ((code & list_) != 0);
607 }
608
Count()609 int Count() const {
610 VIXL_ASSERT(IsValid());
611 return CountSetBits(list_);
612 }
613
RegisterSizeInBits()614 unsigned RegisterSizeInBits() const {
615 VIXL_ASSERT(IsValid());
616 return size_;
617 }
618
RegisterSizeInBytes()619 unsigned RegisterSizeInBytes() const {
620 int size_in_bits = RegisterSizeInBits();
621 VIXL_ASSERT((size_in_bits % 8) == 0);
622 return size_in_bits / 8;
623 }
624
TotalSizeInBytes()625 unsigned TotalSizeInBytes() const {
626 VIXL_ASSERT(IsValid());
627 return RegisterSizeInBytes() * Count();
628 }
629
630 private:
631 RegList list_;
632 unsigned size_;
633 CPURegister::RegisterType type_;
634
635 bool IsValid() const;
636 };
637
638
639 // AAPCS64 callee-saved registers.
640 extern const CPURegList kCalleeSaved;
641 extern const CPURegList kCalleeSavedV;
642
643
644 // AAPCS64 caller-saved registers. Note that this includes lr.
645 extern const CPURegList kCallerSaved;
646 extern const CPURegList kCallerSavedV;
647
648
649 // Operand.
650 class Operand {
651 public:
652 // #<immediate>
653 // where <immediate> is int64_t.
654 // This is allowed to be an implicit constructor because Operand is
655 // a wrapper class that doesn't normally perform any type conversion.
656 Operand(int64_t immediate = 0); // NOLINT(runtime/explicit)
657
658 // rm, {<shift> #<shift_amount>}
659 // where <shift> is one of {LSL, LSR, ASR, ROR}.
660 // <shift_amount> is uint6_t.
661 // This is allowed to be an implicit constructor because Operand is
662 // a wrapper class that doesn't normally perform any type conversion.
663 Operand(Register reg,
664 Shift shift = LSL,
665 unsigned shift_amount = 0); // NOLINT(runtime/explicit)
666
667 // rm, {<extend> {#<shift_amount>}}
668 // where <extend> is one of {UXTB, UXTH, UXTW, UXTX, SXTB, SXTH, SXTW, SXTX}.
669 // <shift_amount> is uint2_t.
670 explicit Operand(Register reg, Extend extend, unsigned shift_amount = 0);
671
672 bool IsImmediate() const;
673 bool IsShiftedRegister() const;
674 bool IsExtendedRegister() const;
675 bool IsZero() const;
676
677 // This returns an LSL shift (<= 4) operand as an equivalent extend operand,
678 // which helps in the encoding of instructions that use the stack pointer.
679 Operand ToExtendedRegister() const;
680
immediate()681 int64_t immediate() const {
682 VIXL_ASSERT(IsImmediate());
683 return immediate_;
684 }
685
reg()686 Register reg() const {
687 VIXL_ASSERT(IsShiftedRegister() || IsExtendedRegister());
688 return reg_;
689 }
690
shift()691 Shift shift() const {
692 VIXL_ASSERT(IsShiftedRegister());
693 return shift_;
694 }
695
extend()696 Extend extend() const {
697 VIXL_ASSERT(IsExtendedRegister());
698 return extend_;
699 }
700
shift_amount()701 unsigned shift_amount() const {
702 VIXL_ASSERT(IsShiftedRegister() || IsExtendedRegister());
703 return shift_amount_;
704 }
705
706 private:
707 int64_t immediate_;
708 Register reg_;
709 Shift shift_;
710 Extend extend_;
711 unsigned shift_amount_;
712 };
713
714
715 // MemOperand represents the addressing mode of a load or store instruction.
716 class MemOperand {
717 public:
718 explicit MemOperand(Register base,
719 int64_t offset = 0,
720 AddrMode addrmode = Offset);
721 MemOperand(Register base,
722 Register regoffset,
723 Shift shift = LSL,
724 unsigned shift_amount = 0);
725 MemOperand(Register base,
726 Register regoffset,
727 Extend extend,
728 unsigned shift_amount = 0);
729 MemOperand(Register base,
730 const Operand& offset,
731 AddrMode addrmode = Offset);
732
base()733 const Register& base() const { return base_; }
regoffset()734 const Register& regoffset() const { return regoffset_; }
offset()735 int64_t offset() const { return offset_; }
addrmode()736 AddrMode addrmode() const { return addrmode_; }
shift()737 Shift shift() const { return shift_; }
extend()738 Extend extend() const { return extend_; }
shift_amount()739 unsigned shift_amount() const { return shift_amount_; }
740 bool IsImmediateOffset() const;
741 bool IsRegisterOffset() const;
742 bool IsPreIndex() const;
743 bool IsPostIndex() const;
744
745 void AddOffset(int64_t offset);
746
747 private:
748 Register base_;
749 Register regoffset_;
750 int64_t offset_;
751 AddrMode addrmode_;
752 Shift shift_;
753 Extend extend_;
754 unsigned shift_amount_;
755 };
756
757
758 class LabelTestHelper; // Forward declaration.
759
760
761 class Label {
762 public:
Label()763 Label() : location_(kLocationUnbound) {}
~Label()764 ~Label() {
765 // If the label has been linked to, it needs to be bound to a target.
766 VIXL_ASSERT(!IsLinked() || IsBound());
767 }
768
IsBound()769 bool IsBound() const { return location_ >= 0; }
IsLinked()770 bool IsLinked() const { return !links_.empty(); }
771
location()772 ptrdiff_t location() const { return location_; }
773
774 static const int kNPreallocatedLinks = 4;
775 static const ptrdiff_t kInvalidLinkKey = PTRDIFF_MAX;
776 static const size_t kReclaimFrom = 512;
777 static const size_t kReclaimFactor = 2;
778
779 typedef InvalSet<ptrdiff_t,
780 kNPreallocatedLinks,
781 ptrdiff_t,
782 kInvalidLinkKey,
783 kReclaimFrom,
784 kReclaimFactor> LinksSetBase;
785 typedef InvalSetIterator<LinksSetBase> LabelLinksIteratorBase;
786
787 private:
788 class LinksSet : public LinksSetBase {
789 public:
LinksSet()790 LinksSet() : LinksSetBase() {}
791 };
792
793 // Allows iterating over the links of a label. The behaviour is undefined if
794 // the list of links is modified in any way while iterating.
795 class LabelLinksIterator : public LabelLinksIteratorBase {
796 public:
LabelLinksIterator(Label * label)797 explicit LabelLinksIterator(Label* label)
798 : LabelLinksIteratorBase(&label->links_) {}
799 };
800
Bind(ptrdiff_t location)801 void Bind(ptrdiff_t location) {
802 // Labels can only be bound once.
803 VIXL_ASSERT(!IsBound());
804 location_ = location;
805 }
806
AddLink(ptrdiff_t instruction)807 void AddLink(ptrdiff_t instruction) {
808 // If a label is bound, the assembler already has the information it needs
809 // to write the instruction, so there is no need to add it to links_.
810 VIXL_ASSERT(!IsBound());
811 links_.insert(instruction);
812 }
813
DeleteLink(ptrdiff_t instruction)814 void DeleteLink(ptrdiff_t instruction) {
815 links_.erase(instruction);
816 }
817
ClearAllLinks()818 void ClearAllLinks() {
819 links_.clear();
820 }
821
822 // TODO: The comment below considers average case complexity for our
823 // usual use-cases. The elements of interest are:
824 // - Branches to a label are emitted in order: branch instructions to a label
825 // are generated at an offset in the code generation buffer greater than any
826 // other branch to that same label already generated. As an example, this can
827 // be broken when an instruction is patched to become a branch. Note that the
828 // code will still work, but the complexity considerations below may locally
829 // not apply any more.
830 // - Veneers are generated in order: for multiple branches of the same type
831 // branching to the same unbound label going out of range, veneers are
832 // generated in growing order of the branch instruction offset from the start
833 // of the buffer.
834 //
835 // When creating a veneer for a branch going out of range, the link for this
836 // branch needs to be removed from this `links_`. Since all branches are
837 // tracked in one underlying InvalSet, the complexity for this deletion is the
838 // same as for finding the element, ie. O(n), where n is the number of links
839 // in the set.
840 // This could be reduced to O(1) by using the same trick as used when tracking
841 // branch information for veneers: split the container to use one set per type
842 // of branch. With that setup, when a veneer is created and the link needs to
843 // be deleted, if the two points above hold, it must be the minimum element of
844 // the set for its type of branch, and that minimum element will be accessible
845 // in O(1).
846
847 // The offsets of the instructions that have linked to this label.
848 LinksSet links_;
849 // The label location.
850 ptrdiff_t location_;
851
852 static const ptrdiff_t kLocationUnbound = -1;
853
854 // It is not safe to copy labels, so disable the copy constructor and operator
855 // by declaring them private (without an implementation).
856 Label(const Label&);
857 void operator=(const Label&);
858
859 // The Assembler class is responsible for binding and linking labels, since
860 // the stored offsets need to be consistent with the Assembler's buffer.
861 friend class Assembler;
862 // The MacroAssembler and VeneerPool handle resolution of branches to distant
863 // targets.
864 friend class MacroAssembler;
865 friend class VeneerPool;
866 };
867
868
869 // Required InvalSet template specialisations.
870 #define INVAL_SET_TEMPLATE_PARAMETERS \
871 ptrdiff_t, \
872 Label::kNPreallocatedLinks, \
873 ptrdiff_t, \
874 Label::kInvalidLinkKey, \
875 Label::kReclaimFrom, \
876 Label::kReclaimFactor
877 template<>
Key(const ptrdiff_t & element)878 inline ptrdiff_t InvalSet<INVAL_SET_TEMPLATE_PARAMETERS>::Key(
879 const ptrdiff_t& element) {
880 return element;
881 }
882 template<>
SetKey(ptrdiff_t * element,ptrdiff_t key)883 inline void InvalSet<INVAL_SET_TEMPLATE_PARAMETERS>::SetKey(
884 ptrdiff_t* element, ptrdiff_t key) {
885 *element = key;
886 }
887 #undef INVAL_SET_TEMPLATE_PARAMETERS
888
889
890 class Assembler;
891 class LiteralPool;
892
893 // A literal is a 32-bit or 64-bit piece of data stored in the instruction
894 // stream and loaded through a pc relative load. The same literal can be
895 // referred to by multiple instructions but a literal can only reside at one
896 // place in memory. A literal can be used by a load before or after being
897 // placed in memory.
898 //
899 // Internally an offset of 0 is associated with a literal which has been
900 // neither used nor placed. Then two possibilities arise:
901 // 1) the label is placed, the offset (stored as offset + 1) is used to
902 // resolve any subsequent load using the label.
903 // 2) the label is not placed and offset is the offset of the last load using
904 // the literal (stored as -offset -1). If multiple loads refer to this
905 // literal then the last load holds the offset of the preceding load and
906 // all loads form a chain. Once the offset is placed all the loads in the
907 // chain are resolved and future loads fall back to possibility 1.
908 class RawLiteral {
909 public:
910 enum DeletionPolicy {
911 kDeletedOnPlacementByPool,
912 kDeletedOnPoolDestruction,
913 kManuallyDeleted
914 };
915
916 RawLiteral(size_t size,
917 LiteralPool* literal_pool,
918 DeletionPolicy deletion_policy = kManuallyDeleted);
919
920 // The literal pool only sees and deletes `RawLiteral*` pointers, but they are
921 // actually pointing to `Literal<T>` objects.
~RawLiteral()922 virtual ~RawLiteral() {}
923
size()924 size_t size() {
925 VIXL_STATIC_ASSERT(kDRegSizeInBytes == kXRegSizeInBytes);
926 VIXL_STATIC_ASSERT(kSRegSizeInBytes == kWRegSizeInBytes);
927 VIXL_ASSERT((size_ == kXRegSizeInBytes) ||
928 (size_ == kWRegSizeInBytes) ||
929 (size_ == kQRegSizeInBytes));
930 return size_;
931 }
raw_value128_low64()932 uint64_t raw_value128_low64() {
933 VIXL_ASSERT(size_ == kQRegSizeInBytes);
934 return low64_;
935 }
raw_value128_high64()936 uint64_t raw_value128_high64() {
937 VIXL_ASSERT(size_ == kQRegSizeInBytes);
938 return high64_;
939 }
raw_value64()940 uint64_t raw_value64() {
941 VIXL_ASSERT(size_ == kXRegSizeInBytes);
942 VIXL_ASSERT(high64_ == 0);
943 return low64_;
944 }
raw_value32()945 uint32_t raw_value32() {
946 VIXL_ASSERT(size_ == kWRegSizeInBytes);
947 VIXL_ASSERT(high64_ == 0);
948 VIXL_ASSERT(is_uint32(low64_) || is_int32(low64_));
949 return static_cast<uint32_t>(low64_);
950 }
IsUsed()951 bool IsUsed() { return offset_ < 0; }
IsPlaced()952 bool IsPlaced() { return offset_ > 0; }
953
GetLiteralPool()954 LiteralPool* GetLiteralPool() const {
955 return literal_pool_;
956 }
957
offset()958 ptrdiff_t offset() {
959 VIXL_ASSERT(IsPlaced());
960 return offset_ - 1;
961 }
962
963 protected:
set_offset(ptrdiff_t offset)964 void set_offset(ptrdiff_t offset) {
965 VIXL_ASSERT(offset >= 0);
966 VIXL_ASSERT(IsWordAligned(offset));
967 VIXL_ASSERT(!IsPlaced());
968 offset_ = offset + 1;
969 }
last_use()970 ptrdiff_t last_use() {
971 VIXL_ASSERT(IsUsed());
972 return -offset_ - 1;
973 }
set_last_use(ptrdiff_t offset)974 void set_last_use(ptrdiff_t offset) {
975 VIXL_ASSERT(offset >= 0);
976 VIXL_ASSERT(IsWordAligned(offset));
977 VIXL_ASSERT(!IsPlaced());
978 offset_ = -offset - 1;
979 }
980
981 size_t size_;
982 ptrdiff_t offset_;
983 uint64_t low64_;
984 uint64_t high64_;
985
986 private:
987 LiteralPool* literal_pool_;
988 DeletionPolicy deletion_policy_;
989
990 friend class Assembler;
991 friend class LiteralPool;
992 };
993
994
995 template <typename T>
996 class Literal : public RawLiteral {
997 public:
998 explicit Literal(T value,
999 LiteralPool* literal_pool = NULL,
1000 RawLiteral::DeletionPolicy ownership = kManuallyDeleted)
RawLiteral(sizeof (value),literal_pool,ownership)1001 : RawLiteral(sizeof(value), literal_pool, ownership) {
1002 VIXL_STATIC_ASSERT(sizeof(value) <= kXRegSizeInBytes);
1003 UpdateValue(value);
1004 }
1005
1006 Literal(T high64, T low64,
1007 LiteralPool* literal_pool = NULL,
1008 RawLiteral::DeletionPolicy ownership = kManuallyDeleted)
RawLiteral(kQRegSizeInBytes,literal_pool,ownership)1009 : RawLiteral(kQRegSizeInBytes, literal_pool, ownership) {
1010 VIXL_STATIC_ASSERT(sizeof(low64) == (kQRegSizeInBytes / 2));
1011 UpdateValue(high64, low64);
1012 }
1013
~Literal()1014 virtual ~Literal() {}
1015
1016 // Update the value of this literal, if necessary by rewriting the value in
1017 // the pool.
1018 // If the literal has already been placed in a literal pool, the address of
1019 // the start of the code buffer must be provided, as the literal only knows it
1020 // offset from there. This also allows patching the value after the code has
1021 // been moved in memory.
1022 void UpdateValue(T new_value, uint8_t* code_buffer = NULL) {
1023 VIXL_ASSERT(sizeof(new_value) == size_);
1024 memcpy(&low64_, &new_value, sizeof(new_value));
1025 if (IsPlaced()) {
1026 VIXL_ASSERT(code_buffer != NULL);
1027 RewriteValueInCode(code_buffer);
1028 }
1029 }
1030
1031 void UpdateValue(T high64, T low64, uint8_t* code_buffer = NULL) {
1032 VIXL_ASSERT(sizeof(low64) == size_ / 2);
1033 memcpy(&low64_, &low64, sizeof(low64));
1034 memcpy(&high64_, &high64, sizeof(high64));
1035 if (IsPlaced()) {
1036 VIXL_ASSERT(code_buffer != NULL);
1037 RewriteValueInCode(code_buffer);
1038 }
1039 }
1040
1041 void UpdateValue(T new_value, const Assembler* assembler);
1042 void UpdateValue(T high64, T low64, const Assembler* assembler);
1043
1044 private:
RewriteValueInCode(uint8_t * code_buffer)1045 void RewriteValueInCode(uint8_t* code_buffer) {
1046 VIXL_ASSERT(IsPlaced());
1047 VIXL_STATIC_ASSERT(sizeof(T) <= kXRegSizeInBytes);
1048 switch (size()) {
1049 case kSRegSizeInBytes:
1050 *reinterpret_cast<uint32_t*>(code_buffer + offset()) = raw_value32();
1051 break;
1052 case kDRegSizeInBytes:
1053 *reinterpret_cast<uint64_t*>(code_buffer + offset()) = raw_value64();
1054 break;
1055 default:
1056 VIXL_ASSERT(size() == kQRegSizeInBytes);
1057 uint64_t* base_address =
1058 reinterpret_cast<uint64_t*>(code_buffer + offset());
1059 *base_address = raw_value128_low64();
1060 *(base_address + 1) = raw_value128_high64();
1061 }
1062 }
1063 };
1064
1065
1066 // Control whether or not position-independent code should be emitted.
1067 enum PositionIndependentCodeOption {
1068 // All code generated will be position-independent; all branches and
1069 // references to labels generated with the Label class will use PC-relative
1070 // addressing.
1071 PositionIndependentCode,
1072
1073 // Allow VIXL to generate code that refers to absolute addresses. With this
1074 // option, it will not be possible to copy the code buffer and run it from a
1075 // different address; code must be generated in its final location.
1076 PositionDependentCode,
1077
1078 // Allow VIXL to assume that the bottom 12 bits of the address will be
1079 // constant, but that the top 48 bits may change. This allows `adrp` to
1080 // function in systems which copy code between pages, but otherwise maintain
1081 // 4KB page alignment.
1082 PageOffsetDependentCode
1083 };
1084
1085
1086 // Control how scaled- and unscaled-offset loads and stores are generated.
1087 enum LoadStoreScalingOption {
1088 // Prefer scaled-immediate-offset instructions, but emit unscaled-offset,
1089 // register-offset, pre-index or post-index instructions if necessary.
1090 PreferScaledOffset,
1091
1092 // Prefer unscaled-immediate-offset instructions, but emit scaled-offset,
1093 // register-offset, pre-index or post-index instructions if necessary.
1094 PreferUnscaledOffset,
1095
1096 // Require scaled-immediate-offset instructions.
1097 RequireScaledOffset,
1098
1099 // Require unscaled-immediate-offset instructions.
1100 RequireUnscaledOffset
1101 };
1102
1103
1104 // Assembler.
1105 class Assembler {
1106 public:
1107 Assembler(size_t capacity,
1108 PositionIndependentCodeOption pic = PositionIndependentCode);
1109 Assembler(byte* buffer, size_t capacity,
1110 PositionIndependentCodeOption pic = PositionIndependentCode);
1111
1112 // The destructor asserts that one of the following is true:
1113 // * The Assembler object has not been used.
1114 // * Nothing has been emitted since the last Reset() call.
1115 // * Nothing has been emitted since the last FinalizeCode() call.
1116 ~Assembler();
1117
1118 // System functions.
1119
1120 // Start generating code from the beginning of the buffer, discarding any code
1121 // and data that has already been emitted into the buffer.
1122 void Reset();
1123
1124 // Finalize a code buffer of generated instructions. This function must be
1125 // called before executing or copying code from the buffer.
1126 void FinalizeCode();
1127
1128 // Label.
1129 // Bind a label to the current PC.
1130 void bind(Label* label);
1131
1132 // Bind a label to a specified offset from the start of the buffer.
1133 void BindToOffset(Label* label, ptrdiff_t offset);
1134
1135 // Place a literal at the current PC.
1136 void place(RawLiteral* literal);
1137
CursorOffset()1138 ptrdiff_t CursorOffset() const {
1139 return buffer_->CursorOffset();
1140 }
1141
BufferEndOffset()1142 ptrdiff_t BufferEndOffset() const {
1143 return static_cast<ptrdiff_t>(buffer_->capacity());
1144 }
1145
1146 // Return the address of an offset in the buffer.
1147 template <typename T>
GetOffsetAddress(ptrdiff_t offset)1148 T GetOffsetAddress(ptrdiff_t offset) const {
1149 VIXL_STATIC_ASSERT(sizeof(T) >= sizeof(uintptr_t));
1150 return buffer_->GetOffsetAddress<T>(offset);
1151 }
1152
1153 // Return the address of a bound label.
1154 template <typename T>
GetLabelAddress(const Label * label)1155 T GetLabelAddress(const Label * label) const {
1156 VIXL_ASSERT(label->IsBound());
1157 VIXL_STATIC_ASSERT(sizeof(T) >= sizeof(uintptr_t));
1158 return GetOffsetAddress<T>(label->location());
1159 }
1160
1161 // Return the address of the cursor.
1162 template <typename T>
GetCursorAddress()1163 T GetCursorAddress() const {
1164 VIXL_STATIC_ASSERT(sizeof(T) >= sizeof(uintptr_t));
1165 return GetOffsetAddress<T>(CursorOffset());
1166 }
1167
1168 // Return the address of the start of the buffer.
1169 template <typename T>
GetStartAddress()1170 T GetStartAddress() const {
1171 VIXL_STATIC_ASSERT(sizeof(T) >= sizeof(uintptr_t));
1172 return GetOffsetAddress<T>(0);
1173 }
1174
InstructionAt(ptrdiff_t instruction_offset)1175 Instruction* InstructionAt(ptrdiff_t instruction_offset) {
1176 return GetOffsetAddress<Instruction*>(instruction_offset);
1177 }
1178
InstructionOffset(Instruction * instruction)1179 ptrdiff_t InstructionOffset(Instruction* instruction) {
1180 VIXL_STATIC_ASSERT(sizeof(*instruction) == 1);
1181 ptrdiff_t offset = instruction - GetStartAddress<Instruction*>();
1182 VIXL_ASSERT((0 <= offset) &&
1183 (offset < static_cast<ptrdiff_t>(BufferCapacity())));
1184 return offset;
1185 }
1186
1187 // Instruction set functions.
1188
1189 // Branch / Jump instructions.
1190 // Branch to register.
1191 void br(const Register& xn);
1192
1193 // Branch with link to register.
1194 void blr(const Register& xn);
1195
1196 // Branch to register with return hint.
1197 void ret(const Register& xn = lr);
1198
1199 // Unconditional branch to label.
1200 void b(Label* label);
1201
1202 // Conditional branch to label.
1203 void b(Label* label, Condition cond);
1204
1205 // Unconditional branch to PC offset.
1206 void b(int imm26);
1207
1208 // Conditional branch to PC offset.
1209 void b(int imm19, Condition cond);
1210
1211 // Branch with link to label.
1212 void bl(Label* label);
1213
1214 // Branch with link to PC offset.
1215 void bl(int imm26);
1216
1217 // Compare and branch to label if zero.
1218 void cbz(const Register& rt, Label* label);
1219
1220 // Compare and branch to PC offset if zero.
1221 void cbz(const Register& rt, int imm19);
1222
1223 // Compare and branch to label if not zero.
1224 void cbnz(const Register& rt, Label* label);
1225
1226 // Compare and branch to PC offset if not zero.
1227 void cbnz(const Register& rt, int imm19);
1228
1229 // Table lookup from one register.
1230 void tbl(const VRegister& vd,
1231 const VRegister& vn,
1232 const VRegister& vm);
1233
1234 // Table lookup from two registers.
1235 void tbl(const VRegister& vd,
1236 const VRegister& vn,
1237 const VRegister& vn2,
1238 const VRegister& vm);
1239
1240 // Table lookup from three registers.
1241 void tbl(const VRegister& vd,
1242 const VRegister& vn,
1243 const VRegister& vn2,
1244 const VRegister& vn3,
1245 const VRegister& vm);
1246
1247 // Table lookup from four registers.
1248 void tbl(const VRegister& vd,
1249 const VRegister& vn,
1250 const VRegister& vn2,
1251 const VRegister& vn3,
1252 const VRegister& vn4,
1253 const VRegister& vm);
1254
1255 // Table lookup extension from one register.
1256 void tbx(const VRegister& vd,
1257 const VRegister& vn,
1258 const VRegister& vm);
1259
1260 // Table lookup extension from two registers.
1261 void tbx(const VRegister& vd,
1262 const VRegister& vn,
1263 const VRegister& vn2,
1264 const VRegister& vm);
1265
1266 // Table lookup extension from three registers.
1267 void tbx(const VRegister& vd,
1268 const VRegister& vn,
1269 const VRegister& vn2,
1270 const VRegister& vn3,
1271 const VRegister& vm);
1272
1273 // Table lookup extension from four registers.
1274 void tbx(const VRegister& vd,
1275 const VRegister& vn,
1276 const VRegister& vn2,
1277 const VRegister& vn3,
1278 const VRegister& vn4,
1279 const VRegister& vm);
1280
1281 // Test bit and branch to label if zero.
1282 void tbz(const Register& rt, unsigned bit_pos, Label* label);
1283
1284 // Test bit and branch to PC offset if zero.
1285 void tbz(const Register& rt, unsigned bit_pos, int imm14);
1286
1287 // Test bit and branch to label if not zero.
1288 void tbnz(const Register& rt, unsigned bit_pos, Label* label);
1289
1290 // Test bit and branch to PC offset if not zero.
1291 void tbnz(const Register& rt, unsigned bit_pos, int imm14);
1292
1293 // Address calculation instructions.
1294 // Calculate a PC-relative address. Unlike for branches the offset in adr is
1295 // unscaled (i.e. the result can be unaligned).
1296
1297 // Calculate the address of a label.
1298 void adr(const Register& rd, Label* label);
1299
1300 // Calculate the address of a PC offset.
1301 void adr(const Register& rd, int imm21);
1302
1303 // Calculate the page address of a label.
1304 void adrp(const Register& rd, Label* label);
1305
1306 // Calculate the page address of a PC offset.
1307 void adrp(const Register& rd, int imm21);
1308
1309 // Data Processing instructions.
1310 // Add.
1311 void add(const Register& rd,
1312 const Register& rn,
1313 const Operand& operand);
1314
1315 // Add and update status flags.
1316 void adds(const Register& rd,
1317 const Register& rn,
1318 const Operand& operand);
1319
1320 // Compare negative.
1321 void cmn(const Register& rn, const Operand& operand);
1322
1323 // Subtract.
1324 void sub(const Register& rd,
1325 const Register& rn,
1326 const Operand& operand);
1327
1328 // Subtract and update status flags.
1329 void subs(const Register& rd,
1330 const Register& rn,
1331 const Operand& operand);
1332
1333 // Compare.
1334 void cmp(const Register& rn, const Operand& operand);
1335
1336 // Negate.
1337 void neg(const Register& rd,
1338 const Operand& operand);
1339
1340 // Negate and update status flags.
1341 void negs(const Register& rd,
1342 const Operand& operand);
1343
1344 // Add with carry bit.
1345 void adc(const Register& rd,
1346 const Register& rn,
1347 const Operand& operand);
1348
1349 // Add with carry bit and update status flags.
1350 void adcs(const Register& rd,
1351 const Register& rn,
1352 const Operand& operand);
1353
1354 // Subtract with carry bit.
1355 void sbc(const Register& rd,
1356 const Register& rn,
1357 const Operand& operand);
1358
1359 // Subtract with carry bit and update status flags.
1360 void sbcs(const Register& rd,
1361 const Register& rn,
1362 const Operand& operand);
1363
1364 // Negate with carry bit.
1365 void ngc(const Register& rd,
1366 const Operand& operand);
1367
1368 // Negate with carry bit and update status flags.
1369 void ngcs(const Register& rd,
1370 const Operand& operand);
1371
1372 // Logical instructions.
1373 // Bitwise and (A & B).
1374 void and_(const Register& rd,
1375 const Register& rn,
1376 const Operand& operand);
1377
1378 // Bitwise and (A & B) and update status flags.
1379 void ands(const Register& rd,
1380 const Register& rn,
1381 const Operand& operand);
1382
1383 // Bit test and set flags.
1384 void tst(const Register& rn, const Operand& operand);
1385
1386 // Bit clear (A & ~B).
1387 void bic(const Register& rd,
1388 const Register& rn,
1389 const Operand& operand);
1390
1391 // Bit clear (A & ~B) and update status flags.
1392 void bics(const Register& rd,
1393 const Register& rn,
1394 const Operand& operand);
1395
1396 // Bitwise or (A | B).
1397 void orr(const Register& rd, const Register& rn, const Operand& operand);
1398
1399 // Bitwise nor (A | ~B).
1400 void orn(const Register& rd, const Register& rn, const Operand& operand);
1401
1402 // Bitwise eor/xor (A ^ B).
1403 void eor(const Register& rd, const Register& rn, const Operand& operand);
1404
1405 // Bitwise enor/xnor (A ^ ~B).
1406 void eon(const Register& rd, const Register& rn, const Operand& operand);
1407
1408 // Logical shift left by variable.
1409 void lslv(const Register& rd, const Register& rn, const Register& rm);
1410
1411 // Logical shift right by variable.
1412 void lsrv(const Register& rd, const Register& rn, const Register& rm);
1413
1414 // Arithmetic shift right by variable.
1415 void asrv(const Register& rd, const Register& rn, const Register& rm);
1416
1417 // Rotate right by variable.
1418 void rorv(const Register& rd, const Register& rn, const Register& rm);
1419
1420 // Bitfield instructions.
1421 // Bitfield move.
1422 void bfm(const Register& rd,
1423 const Register& rn,
1424 unsigned immr,
1425 unsigned imms);
1426
1427 // Signed bitfield move.
1428 void sbfm(const Register& rd,
1429 const Register& rn,
1430 unsigned immr,
1431 unsigned imms);
1432
1433 // Unsigned bitfield move.
1434 void ubfm(const Register& rd,
1435 const Register& rn,
1436 unsigned immr,
1437 unsigned imms);
1438
1439 // Bfm aliases.
1440 // Bitfield insert.
bfi(const Register & rd,const Register & rn,unsigned lsb,unsigned width)1441 void bfi(const Register& rd,
1442 const Register& rn,
1443 unsigned lsb,
1444 unsigned width) {
1445 VIXL_ASSERT(width >= 1);
1446 VIXL_ASSERT(lsb + width <= rn.size());
1447 bfm(rd, rn, (rd.size() - lsb) & (rd.size() - 1), width - 1);
1448 }
1449
1450 // Bitfield extract and insert low.
bfxil(const Register & rd,const Register & rn,unsigned lsb,unsigned width)1451 void bfxil(const Register& rd,
1452 const Register& rn,
1453 unsigned lsb,
1454 unsigned width) {
1455 VIXL_ASSERT(width >= 1);
1456 VIXL_ASSERT(lsb + width <= rn.size());
1457 bfm(rd, rn, lsb, lsb + width - 1);
1458 }
1459
1460 // Sbfm aliases.
1461 // Arithmetic shift right.
asr(const Register & rd,const Register & rn,unsigned shift)1462 void asr(const Register& rd, const Register& rn, unsigned shift) {
1463 VIXL_ASSERT(shift < rd.size());
1464 sbfm(rd, rn, shift, rd.size() - 1);
1465 }
1466
1467 // Signed bitfield insert with zero at right.
sbfiz(const Register & rd,const Register & rn,unsigned lsb,unsigned width)1468 void sbfiz(const Register& rd,
1469 const Register& rn,
1470 unsigned lsb,
1471 unsigned width) {
1472 VIXL_ASSERT(width >= 1);
1473 VIXL_ASSERT(lsb + width <= rn.size());
1474 sbfm(rd, rn, (rd.size() - lsb) & (rd.size() - 1), width - 1);
1475 }
1476
1477 // Signed bitfield extract.
sbfx(const Register & rd,const Register & rn,unsigned lsb,unsigned width)1478 void sbfx(const Register& rd,
1479 const Register& rn,
1480 unsigned lsb,
1481 unsigned width) {
1482 VIXL_ASSERT(width >= 1);
1483 VIXL_ASSERT(lsb + width <= rn.size());
1484 sbfm(rd, rn, lsb, lsb + width - 1);
1485 }
1486
1487 // Signed extend byte.
sxtb(const Register & rd,const Register & rn)1488 void sxtb(const Register& rd, const Register& rn) {
1489 sbfm(rd, rn, 0, 7);
1490 }
1491
1492 // Signed extend halfword.
sxth(const Register & rd,const Register & rn)1493 void sxth(const Register& rd, const Register& rn) {
1494 sbfm(rd, rn, 0, 15);
1495 }
1496
1497 // Signed extend word.
sxtw(const Register & rd,const Register & rn)1498 void sxtw(const Register& rd, const Register& rn) {
1499 sbfm(rd, rn, 0, 31);
1500 }
1501
1502 // Ubfm aliases.
1503 // Logical shift left.
lsl(const Register & rd,const Register & rn,unsigned shift)1504 void lsl(const Register& rd, const Register& rn, unsigned shift) {
1505 unsigned reg_size = rd.size();
1506 VIXL_ASSERT(shift < reg_size);
1507 ubfm(rd, rn, (reg_size - shift) % reg_size, reg_size - shift - 1);
1508 }
1509
1510 // Logical shift right.
lsr(const Register & rd,const Register & rn,unsigned shift)1511 void lsr(const Register& rd, const Register& rn, unsigned shift) {
1512 VIXL_ASSERT(shift < rd.size());
1513 ubfm(rd, rn, shift, rd.size() - 1);
1514 }
1515
1516 // Unsigned bitfield insert with zero at right.
ubfiz(const Register & rd,const Register & rn,unsigned lsb,unsigned width)1517 void ubfiz(const Register& rd,
1518 const Register& rn,
1519 unsigned lsb,
1520 unsigned width) {
1521 VIXL_ASSERT(width >= 1);
1522 VIXL_ASSERT(lsb + width <= rn.size());
1523 ubfm(rd, rn, (rd.size() - lsb) & (rd.size() - 1), width - 1);
1524 }
1525
1526 // Unsigned bitfield extract.
ubfx(const Register & rd,const Register & rn,unsigned lsb,unsigned width)1527 void ubfx(const Register& rd,
1528 const Register& rn,
1529 unsigned lsb,
1530 unsigned width) {
1531 VIXL_ASSERT(width >= 1);
1532 VIXL_ASSERT(lsb + width <= rn.size());
1533 ubfm(rd, rn, lsb, lsb + width - 1);
1534 }
1535
1536 // Unsigned extend byte.
uxtb(const Register & rd,const Register & rn)1537 void uxtb(const Register& rd, const Register& rn) {
1538 ubfm(rd, rn, 0, 7);
1539 }
1540
1541 // Unsigned extend halfword.
uxth(const Register & rd,const Register & rn)1542 void uxth(const Register& rd, const Register& rn) {
1543 ubfm(rd, rn, 0, 15);
1544 }
1545
1546 // Unsigned extend word.
uxtw(const Register & rd,const Register & rn)1547 void uxtw(const Register& rd, const Register& rn) {
1548 ubfm(rd, rn, 0, 31);
1549 }
1550
1551 // Extract.
1552 void extr(const Register& rd,
1553 const Register& rn,
1554 const Register& rm,
1555 unsigned lsb);
1556
1557 // Conditional select: rd = cond ? rn : rm.
1558 void csel(const Register& rd,
1559 const Register& rn,
1560 const Register& rm,
1561 Condition cond);
1562
1563 // Conditional select increment: rd = cond ? rn : rm + 1.
1564 void csinc(const Register& rd,
1565 const Register& rn,
1566 const Register& rm,
1567 Condition cond);
1568
1569 // Conditional select inversion: rd = cond ? rn : ~rm.
1570 void csinv(const Register& rd,
1571 const Register& rn,
1572 const Register& rm,
1573 Condition cond);
1574
1575 // Conditional select negation: rd = cond ? rn : -rm.
1576 void csneg(const Register& rd,
1577 const Register& rn,
1578 const Register& rm,
1579 Condition cond);
1580
1581 // Conditional set: rd = cond ? 1 : 0.
1582 void cset(const Register& rd, Condition cond);
1583
1584 // Conditional set mask: rd = cond ? -1 : 0.
1585 void csetm(const Register& rd, Condition cond);
1586
1587 // Conditional increment: rd = cond ? rn + 1 : rn.
1588 void cinc(const Register& rd, const Register& rn, Condition cond);
1589
1590 // Conditional invert: rd = cond ? ~rn : rn.
1591 void cinv(const Register& rd, const Register& rn, Condition cond);
1592
1593 // Conditional negate: rd = cond ? -rn : rn.
1594 void cneg(const Register& rd, const Register& rn, Condition cond);
1595
1596 // Rotate right.
ror(const Register & rd,const Register & rs,unsigned shift)1597 void ror(const Register& rd, const Register& rs, unsigned shift) {
1598 extr(rd, rs, rs, shift);
1599 }
1600
1601 // Conditional comparison.
1602 // Conditional compare negative.
1603 void ccmn(const Register& rn,
1604 const Operand& operand,
1605 StatusFlags nzcv,
1606 Condition cond);
1607
1608 // Conditional compare.
1609 void ccmp(const Register& rn,
1610 const Operand& operand,
1611 StatusFlags nzcv,
1612 Condition cond);
1613
1614 // CRC-32 checksum from byte.
1615 void crc32b(const Register& rd,
1616 const Register& rn,
1617 const Register& rm);
1618
1619 // CRC-32 checksum from half-word.
1620 void crc32h(const Register& rd,
1621 const Register& rn,
1622 const Register& rm);
1623
1624 // CRC-32 checksum from word.
1625 void crc32w(const Register& rd,
1626 const Register& rn,
1627 const Register& rm);
1628
1629 // CRC-32 checksum from double word.
1630 void crc32x(const Register& rd,
1631 const Register& rn,
1632 const Register& rm);
1633
1634 // CRC-32 C checksum from byte.
1635 void crc32cb(const Register& rd,
1636 const Register& rn,
1637 const Register& rm);
1638
1639 // CRC-32 C checksum from half-word.
1640 void crc32ch(const Register& rd,
1641 const Register& rn,
1642 const Register& rm);
1643
1644 // CRC-32 C checksum from word.
1645 void crc32cw(const Register& rd,
1646 const Register& rn,
1647 const Register& rm);
1648
1649 // CRC-32C checksum from double word.
1650 void crc32cx(const Register& rd,
1651 const Register& rn,
1652 const Register& rm);
1653
1654 // Multiply.
1655 void mul(const Register& rd, const Register& rn, const Register& rm);
1656
1657 // Negated multiply.
1658 void mneg(const Register& rd, const Register& rn, const Register& rm);
1659
1660 // Signed long multiply: 32 x 32 -> 64-bit.
1661 void smull(const Register& rd, const Register& rn, const Register& rm);
1662
1663 // Signed multiply high: 64 x 64 -> 64-bit <127:64>.
1664 void smulh(const Register& xd, const Register& xn, const Register& xm);
1665
1666 // Multiply and accumulate.
1667 void madd(const Register& rd,
1668 const Register& rn,
1669 const Register& rm,
1670 const Register& ra);
1671
1672 // Multiply and subtract.
1673 void msub(const Register& rd,
1674 const Register& rn,
1675 const Register& rm,
1676 const Register& ra);
1677
1678 // Signed long multiply and accumulate: 32 x 32 + 64 -> 64-bit.
1679 void smaddl(const Register& rd,
1680 const Register& rn,
1681 const Register& rm,
1682 const Register& ra);
1683
1684 // Unsigned long multiply and accumulate: 32 x 32 + 64 -> 64-bit.
1685 void umaddl(const Register& rd,
1686 const Register& rn,
1687 const Register& rm,
1688 const Register& ra);
1689
1690 // Unsigned long multiply: 32 x 32 -> 64-bit.
umull(const Register & rd,const Register & rn,const Register & rm)1691 void umull(const Register& rd,
1692 const Register& rn,
1693 const Register& rm) {
1694 umaddl(rd, rn, rm, xzr);
1695 }
1696
1697 // Unsigned multiply high: 64 x 64 -> 64-bit <127:64>.
1698 void umulh(const Register& xd,
1699 const Register& xn,
1700 const Register& xm);
1701
1702 // Signed long multiply and subtract: 64 - (32 x 32) -> 64-bit.
1703 void smsubl(const Register& rd,
1704 const Register& rn,
1705 const Register& rm,
1706 const Register& ra);
1707
1708 // Unsigned long multiply and subtract: 64 - (32 x 32) -> 64-bit.
1709 void umsubl(const Register& rd,
1710 const Register& rn,
1711 const Register& rm,
1712 const Register& ra);
1713
1714 // Signed integer divide.
1715 void sdiv(const Register& rd, const Register& rn, const Register& rm);
1716
1717 // Unsigned integer divide.
1718 void udiv(const Register& rd, const Register& rn, const Register& rm);
1719
1720 // Bit reverse.
1721 void rbit(const Register& rd, const Register& rn);
1722
1723 // Reverse bytes in 16-bit half words.
1724 void rev16(const Register& rd, const Register& rn);
1725
1726 // Reverse bytes in 32-bit words.
1727 void rev32(const Register& rd, const Register& rn);
1728
1729 // Reverse bytes.
1730 void rev(const Register& rd, const Register& rn);
1731
1732 // Count leading zeroes.
1733 void clz(const Register& rd, const Register& rn);
1734
1735 // Count leading sign bits.
1736 void cls(const Register& rd, const Register& rn);
1737
1738 // Memory instructions.
1739 // Load integer or FP register.
1740 void ldr(const CPURegister& rt, const MemOperand& src,
1741 LoadStoreScalingOption option = PreferScaledOffset);
1742
1743 // Store integer or FP register.
1744 void str(const CPURegister& rt, const MemOperand& dst,
1745 LoadStoreScalingOption option = PreferScaledOffset);
1746
1747 // Load word with sign extension.
1748 void ldrsw(const Register& rt, const MemOperand& src,
1749 LoadStoreScalingOption option = PreferScaledOffset);
1750
1751 // Load byte.
1752 void ldrb(const Register& rt, const MemOperand& src,
1753 LoadStoreScalingOption option = PreferScaledOffset);
1754
1755 // Store byte.
1756 void strb(const Register& rt, const MemOperand& dst,
1757 LoadStoreScalingOption option = PreferScaledOffset);
1758
1759 // Load byte with sign extension.
1760 void ldrsb(const Register& rt, const MemOperand& src,
1761 LoadStoreScalingOption option = PreferScaledOffset);
1762
1763 // Load half-word.
1764 void ldrh(const Register& rt, const MemOperand& src,
1765 LoadStoreScalingOption option = PreferScaledOffset);
1766
1767 // Store half-word.
1768 void strh(const Register& rt, const MemOperand& dst,
1769 LoadStoreScalingOption option = PreferScaledOffset);
1770
1771 // Load half-word with sign extension.
1772 void ldrsh(const Register& rt, const MemOperand& src,
1773 LoadStoreScalingOption option = PreferScaledOffset);
1774
1775 // Load integer or FP register (with unscaled offset).
1776 void ldur(const CPURegister& rt, const MemOperand& src,
1777 LoadStoreScalingOption option = PreferUnscaledOffset);
1778
1779 // Store integer or FP register (with unscaled offset).
1780 void stur(const CPURegister& rt, const MemOperand& src,
1781 LoadStoreScalingOption option = PreferUnscaledOffset);
1782
1783 // Load word with sign extension.
1784 void ldursw(const Register& rt, const MemOperand& src,
1785 LoadStoreScalingOption option = PreferUnscaledOffset);
1786
1787 // Load byte (with unscaled offset).
1788 void ldurb(const Register& rt, const MemOperand& src,
1789 LoadStoreScalingOption option = PreferUnscaledOffset);
1790
1791 // Store byte (with unscaled offset).
1792 void sturb(const Register& rt, const MemOperand& dst,
1793 LoadStoreScalingOption option = PreferUnscaledOffset);
1794
1795 // Load byte with sign extension (and unscaled offset).
1796 void ldursb(const Register& rt, const MemOperand& src,
1797 LoadStoreScalingOption option = PreferUnscaledOffset);
1798
1799 // Load half-word (with unscaled offset).
1800 void ldurh(const Register& rt, const MemOperand& src,
1801 LoadStoreScalingOption option = PreferUnscaledOffset);
1802
1803 // Store half-word (with unscaled offset).
1804 void sturh(const Register& rt, const MemOperand& dst,
1805 LoadStoreScalingOption option = PreferUnscaledOffset);
1806
1807 // Load half-word with sign extension (and unscaled offset).
1808 void ldursh(const Register& rt, const MemOperand& src,
1809 LoadStoreScalingOption option = PreferUnscaledOffset);
1810
1811 // Load integer or FP register pair.
1812 void ldp(const CPURegister& rt, const CPURegister& rt2,
1813 const MemOperand& src);
1814
1815 // Store integer or FP register pair.
1816 void stp(const CPURegister& rt, const CPURegister& rt2,
1817 const MemOperand& dst);
1818
1819 // Load word pair with sign extension.
1820 void ldpsw(const Register& rt, const Register& rt2, const MemOperand& src);
1821
1822 // Load integer or FP register pair, non-temporal.
1823 void ldnp(const CPURegister& rt, const CPURegister& rt2,
1824 const MemOperand& src);
1825
1826 // Store integer or FP register pair, non-temporal.
1827 void stnp(const CPURegister& rt, const CPURegister& rt2,
1828 const MemOperand& dst);
1829
1830 // Load integer or FP register from literal pool.
1831 void ldr(const CPURegister& rt, RawLiteral* literal);
1832
1833 // Load word with sign extension from literal pool.
1834 void ldrsw(const Register& rt, RawLiteral* literal);
1835
1836 // Load integer or FP register from pc + imm19 << 2.
1837 void ldr(const CPURegister& rt, int imm19);
1838
1839 // Load word with sign extension from pc + imm19 << 2.
1840 void ldrsw(const Register& rt, int imm19);
1841
1842 // Store exclusive byte.
1843 void stxrb(const Register& rs, const Register& rt, const MemOperand& dst);
1844
1845 // Store exclusive half-word.
1846 void stxrh(const Register& rs, const Register& rt, const MemOperand& dst);
1847
1848 // Store exclusive register.
1849 void stxr(const Register& rs, const Register& rt, const MemOperand& dst);
1850
1851 // Load exclusive byte.
1852 void ldxrb(const Register& rt, const MemOperand& src);
1853
1854 // Load exclusive half-word.
1855 void ldxrh(const Register& rt, const MemOperand& src);
1856
1857 // Load exclusive register.
1858 void ldxr(const Register& rt, const MemOperand& src);
1859
1860 // Store exclusive register pair.
1861 void stxp(const Register& rs,
1862 const Register& rt,
1863 const Register& rt2,
1864 const MemOperand& dst);
1865
1866 // Load exclusive register pair.
1867 void ldxp(const Register& rt, const Register& rt2, const MemOperand& src);
1868
1869 // Store-release exclusive byte.
1870 void stlxrb(const Register& rs, const Register& rt, const MemOperand& dst);
1871
1872 // Store-release exclusive half-word.
1873 void stlxrh(const Register& rs, const Register& rt, const MemOperand& dst);
1874
1875 // Store-release exclusive register.
1876 void stlxr(const Register& rs, const Register& rt, const MemOperand& dst);
1877
1878 // Load-acquire exclusive byte.
1879 void ldaxrb(const Register& rt, const MemOperand& src);
1880
1881 // Load-acquire exclusive half-word.
1882 void ldaxrh(const Register& rt, const MemOperand& src);
1883
1884 // Load-acquire exclusive register.
1885 void ldaxr(const Register& rt, const MemOperand& src);
1886
1887 // Store-release exclusive register pair.
1888 void stlxp(const Register& rs,
1889 const Register& rt,
1890 const Register& rt2,
1891 const MemOperand& dst);
1892
1893 // Load-acquire exclusive register pair.
1894 void ldaxp(const Register& rt, const Register& rt2, const MemOperand& src);
1895
1896 // Store-release byte.
1897 void stlrb(const Register& rt, const MemOperand& dst);
1898
1899 // Store-release half-word.
1900 void stlrh(const Register& rt, const MemOperand& dst);
1901
1902 // Store-release register.
1903 void stlr(const Register& rt, const MemOperand& dst);
1904
1905 // Load-acquire byte.
1906 void ldarb(const Register& rt, const MemOperand& src);
1907
1908 // Load-acquire half-word.
1909 void ldarh(const Register& rt, const MemOperand& src);
1910
1911 // Load-acquire register.
1912 void ldar(const Register& rt, const MemOperand& src);
1913
1914 // Prefetch memory.
1915 void prfm(PrefetchOperation op, const MemOperand& addr,
1916 LoadStoreScalingOption option = PreferScaledOffset);
1917
1918 // Prefetch memory (with unscaled offset).
1919 void prfum(PrefetchOperation op, const MemOperand& addr,
1920 LoadStoreScalingOption option = PreferUnscaledOffset);
1921
1922 // Prefetch memory in the literal pool.
1923 void prfm(PrefetchOperation op, RawLiteral* literal);
1924
1925 // Prefetch from pc + imm19 << 2.
1926 void prfm(PrefetchOperation op, int imm19);
1927
1928 // Move instructions. The default shift of -1 indicates that the move
1929 // instruction will calculate an appropriate 16-bit immediate and left shift
1930 // that is equal to the 64-bit immediate argument. If an explicit left shift
1931 // is specified (0, 16, 32 or 48), the immediate must be a 16-bit value.
1932 //
1933 // For movk, an explicit shift can be used to indicate which half word should
1934 // be overwritten, eg. movk(x0, 0, 0) will overwrite the least-significant
1935 // half word with zero, whereas movk(x0, 0, 48) will overwrite the
1936 // most-significant.
1937
1938 // Move immediate and keep.
1939 void movk(const Register& rd, uint64_t imm, int shift = -1) {
1940 MoveWide(rd, imm, shift, MOVK);
1941 }
1942
1943 // Move inverted immediate.
1944 void movn(const Register& rd, uint64_t imm, int shift = -1) {
1945 MoveWide(rd, imm, shift, MOVN);
1946 }
1947
1948 // Move immediate.
1949 void movz(const Register& rd, uint64_t imm, int shift = -1) {
1950 MoveWide(rd, imm, shift, MOVZ);
1951 }
1952
1953 // Misc instructions.
1954 // Monitor debug-mode breakpoint.
1955 void brk(int code);
1956
1957 // Halting debug-mode breakpoint.
1958 void hlt(int code);
1959
1960 // Generate exception targeting EL1.
1961 void svc(int code);
1962
1963 // Move register to register.
1964 void mov(const Register& rd, const Register& rn);
1965
1966 // Move inverted operand to register.
1967 void mvn(const Register& rd, const Operand& operand);
1968
1969 // System instructions.
1970 // Move to register from system register.
1971 void mrs(const Register& rt, SystemRegister sysreg);
1972
1973 // Move from register to system register.
1974 void msr(SystemRegister sysreg, const Register& rt);
1975
1976 // System instruction.
1977 void sys(int op1, int crn, int crm, int op2, const Register& rt = xzr);
1978
1979 // System instruction with pre-encoded op (op1:crn:crm:op2).
1980 void sys(int op, const Register& rt = xzr);
1981
1982 // System data cache operation.
1983 void dc(DataCacheOp op, const Register& rt);
1984
1985 // System instruction cache operation.
1986 void ic(InstructionCacheOp op, const Register& rt);
1987
1988 // System hint.
1989 void hint(SystemHint code);
1990
1991 // Clear exclusive monitor.
1992 void clrex(int imm4 = 0xf);
1993
1994 // Data memory barrier.
1995 void dmb(BarrierDomain domain, BarrierType type);
1996
1997 // Data synchronization barrier.
1998 void dsb(BarrierDomain domain, BarrierType type);
1999
2000 // Instruction synchronization barrier.
2001 void isb();
2002
2003 // Alias for system instructions.
2004 // No-op.
nop()2005 void nop() {
2006 hint(NOP);
2007 }
2008
2009 // FP and NEON instructions.
2010 // Move double precision immediate to FP register.
2011 void fmov(const VRegister& vd, double imm);
2012
2013 // Move single precision immediate to FP register.
2014 void fmov(const VRegister& vd, float imm);
2015
2016 // Move FP register to register.
2017 void fmov(const Register& rd, const VRegister& fn);
2018
2019 // Move register to FP register.
2020 void fmov(const VRegister& vd, const Register& rn);
2021
2022 // Move FP register to FP register.
2023 void fmov(const VRegister& vd, const VRegister& fn);
2024
2025 // Move 64-bit register to top half of 128-bit FP register.
2026 void fmov(const VRegister& vd, int index, const Register& rn);
2027
2028 // Move top half of 128-bit FP register to 64-bit register.
2029 void fmov(const Register& rd, const VRegister& vn, int index);
2030
2031 // FP add.
2032 void fadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2033
2034 // FP subtract.
2035 void fsub(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2036
2037 // FP multiply.
2038 void fmul(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2039
2040 // FP fused multiply-add.
2041 void fmadd(const VRegister& vd,
2042 const VRegister& vn,
2043 const VRegister& vm,
2044 const VRegister& va);
2045
2046 // FP fused multiply-subtract.
2047 void fmsub(const VRegister& vd,
2048 const VRegister& vn,
2049 const VRegister& vm,
2050 const VRegister& va);
2051
2052 // FP fused multiply-add and negate.
2053 void fnmadd(const VRegister& vd,
2054 const VRegister& vn,
2055 const VRegister& vm,
2056 const VRegister& va);
2057
2058 // FP fused multiply-subtract and negate.
2059 void fnmsub(const VRegister& vd,
2060 const VRegister& vn,
2061 const VRegister& vm,
2062 const VRegister& va);
2063
2064 // FP multiply-negate scalar.
2065 void fnmul(const VRegister& vd,
2066 const VRegister& vn,
2067 const VRegister& vm);
2068
2069 // FP reciprocal exponent scalar.
2070 void frecpx(const VRegister& vd,
2071 const VRegister& vn);
2072
2073 // FP divide.
2074 void fdiv(const VRegister& vd, const VRegister& fn, const VRegister& vm);
2075
2076 // FP maximum.
2077 void fmax(const VRegister& vd, const VRegister& fn, const VRegister& vm);
2078
2079 // FP minimum.
2080 void fmin(const VRegister& vd, const VRegister& fn, const VRegister& vm);
2081
2082 // FP maximum number.
2083 void fmaxnm(const VRegister& vd, const VRegister& fn, const VRegister& vm);
2084
2085 // FP minimum number.
2086 void fminnm(const VRegister& vd, const VRegister& fn, const VRegister& vm);
2087
2088 // FP absolute.
2089 void fabs(const VRegister& vd, const VRegister& vn);
2090
2091 // FP negate.
2092 void fneg(const VRegister& vd, const VRegister& vn);
2093
2094 // FP square root.
2095 void fsqrt(const VRegister& vd, const VRegister& vn);
2096
2097 // FP round to integer, nearest with ties to away.
2098 void frinta(const VRegister& vd, const VRegister& vn);
2099
2100 // FP round to integer, implicit rounding.
2101 void frinti(const VRegister& vd, const VRegister& vn);
2102
2103 // FP round to integer, toward minus infinity.
2104 void frintm(const VRegister& vd, const VRegister& vn);
2105
2106 // FP round to integer, nearest with ties to even.
2107 void frintn(const VRegister& vd, const VRegister& vn);
2108
2109 // FP round to integer, toward plus infinity.
2110 void frintp(const VRegister& vd, const VRegister& vn);
2111
2112 // FP round to integer, exact, implicit rounding.
2113 void frintx(const VRegister& vd, const VRegister& vn);
2114
2115 // FP round to integer, towards zero.
2116 void frintz(const VRegister& vd, const VRegister& vn);
2117
2118 void FPCompareMacro(const VRegister& vn,
2119 double value,
2120 FPTrapFlags trap);
2121
2122 void FPCompareMacro(const VRegister& vn,
2123 const VRegister& vm,
2124 FPTrapFlags trap);
2125
2126 // FP compare registers.
2127 void fcmp(const VRegister& vn, const VRegister& vm);
2128
2129 // FP compare immediate.
2130 void fcmp(const VRegister& vn, double value);
2131
2132 void FPCCompareMacro(const VRegister& vn,
2133 const VRegister& vm,
2134 StatusFlags nzcv,
2135 Condition cond,
2136 FPTrapFlags trap);
2137
2138 // FP conditional compare.
2139 void fccmp(const VRegister& vn,
2140 const VRegister& vm,
2141 StatusFlags nzcv,
2142 Condition cond);
2143
2144 // FP signaling compare registers.
2145 void fcmpe(const VRegister& vn, const VRegister& vm);
2146
2147 // FP signaling compare immediate.
2148 void fcmpe(const VRegister& vn, double value);
2149
2150 // FP conditional signaling compare.
2151 void fccmpe(const VRegister& vn,
2152 const VRegister& vm,
2153 StatusFlags nzcv,
2154 Condition cond);
2155
2156 // FP conditional select.
2157 void fcsel(const VRegister& vd,
2158 const VRegister& vn,
2159 const VRegister& vm,
2160 Condition cond);
2161
2162 // Common FP Convert functions.
2163 void NEONFPConvertToInt(const Register& rd,
2164 const VRegister& vn,
2165 Instr op);
2166 void NEONFPConvertToInt(const VRegister& vd,
2167 const VRegister& vn,
2168 Instr op);
2169
2170 // FP convert between precisions.
2171 void fcvt(const VRegister& vd, const VRegister& vn);
2172
2173 // FP convert to higher precision.
2174 void fcvtl(const VRegister& vd, const VRegister& vn);
2175
2176 // FP convert to higher precision (second part).
2177 void fcvtl2(const VRegister& vd, const VRegister& vn);
2178
2179 // FP convert to lower precision.
2180 void fcvtn(const VRegister& vd, const VRegister& vn);
2181
2182 // FP convert to lower prevision (second part).
2183 void fcvtn2(const VRegister& vd, const VRegister& vn);
2184
2185 // FP convert to lower precision, rounding to odd.
2186 void fcvtxn(const VRegister& vd, const VRegister& vn);
2187
2188 // FP convert to lower precision, rounding to odd (second part).
2189 void fcvtxn2(const VRegister& vd, const VRegister& vn);
2190
2191 // FP convert to signed integer, nearest with ties to away.
2192 void fcvtas(const Register& rd, const VRegister& vn);
2193
2194 // FP convert to unsigned integer, nearest with ties to away.
2195 void fcvtau(const Register& rd, const VRegister& vn);
2196
2197 // FP convert to signed integer, nearest with ties to away.
2198 void fcvtas(const VRegister& vd, const VRegister& vn);
2199
2200 // FP convert to unsigned integer, nearest with ties to away.
2201 void fcvtau(const VRegister& vd, const VRegister& vn);
2202
2203 // FP convert to signed integer, round towards -infinity.
2204 void fcvtms(const Register& rd, const VRegister& vn);
2205
2206 // FP convert to unsigned integer, round towards -infinity.
2207 void fcvtmu(const Register& rd, const VRegister& vn);
2208
2209 // FP convert to signed integer, round towards -infinity.
2210 void fcvtms(const VRegister& vd, const VRegister& vn);
2211
2212 // FP convert to unsigned integer, round towards -infinity.
2213 void fcvtmu(const VRegister& vd, const VRegister& vn);
2214
2215 // FP convert to signed integer, nearest with ties to even.
2216 void fcvtns(const Register& rd, const VRegister& vn);
2217
2218 // FP convert to unsigned integer, nearest with ties to even.
2219 void fcvtnu(const Register& rd, const VRegister& vn);
2220
2221 // FP convert to signed integer, nearest with ties to even.
2222 void fcvtns(const VRegister& rd, const VRegister& vn);
2223
2224 // FP convert to unsigned integer, nearest with ties to even.
2225 void fcvtnu(const VRegister& rd, const VRegister& vn);
2226
2227 // FP convert to signed integer or fixed-point, round towards zero.
2228 void fcvtzs(const Register& rd, const VRegister& vn, int fbits = 0);
2229
2230 // FP convert to unsigned integer or fixed-point, round towards zero.
2231 void fcvtzu(const Register& rd, const VRegister& vn, int fbits = 0);
2232
2233 // FP convert to signed integer or fixed-point, round towards zero.
2234 void fcvtzs(const VRegister& vd, const VRegister& vn, int fbits = 0);
2235
2236 // FP convert to unsigned integer or fixed-point, round towards zero.
2237 void fcvtzu(const VRegister& vd, const VRegister& vn, int fbits = 0);
2238
2239 // FP convert to signed integer, round towards +infinity.
2240 void fcvtps(const Register& rd, const VRegister& vn);
2241
2242 // FP convert to unsigned integer, round towards +infinity.
2243 void fcvtpu(const Register& rd, const VRegister& vn);
2244
2245 // FP convert to signed integer, round towards +infinity.
2246 void fcvtps(const VRegister& vd, const VRegister& vn);
2247
2248 // FP convert to unsigned integer, round towards +infinity.
2249 void fcvtpu(const VRegister& vd, const VRegister& vn);
2250
2251 // Convert signed integer or fixed point to FP.
2252 void scvtf(const VRegister& fd, const Register& rn, int fbits = 0);
2253
2254 // Convert unsigned integer or fixed point to FP.
2255 void ucvtf(const VRegister& fd, const Register& rn, int fbits = 0);
2256
2257 // Convert signed integer or fixed-point to FP.
2258 void scvtf(const VRegister& fd, const VRegister& vn, int fbits = 0);
2259
2260 // Convert unsigned integer or fixed-point to FP.
2261 void ucvtf(const VRegister& fd, const VRegister& vn, int fbits = 0);
2262
2263 // Unsigned absolute difference.
2264 void uabd(const VRegister& vd,
2265 const VRegister& vn,
2266 const VRegister& vm);
2267
2268 // Signed absolute difference.
2269 void sabd(const VRegister& vd,
2270 const VRegister& vn,
2271 const VRegister& vm);
2272
2273 // Unsigned absolute difference and accumulate.
2274 void uaba(const VRegister& vd,
2275 const VRegister& vn,
2276 const VRegister& vm);
2277
2278 // Signed absolute difference and accumulate.
2279 void saba(const VRegister& vd,
2280 const VRegister& vn,
2281 const VRegister& vm);
2282
2283 // Add.
2284 void add(const VRegister& vd,
2285 const VRegister& vn,
2286 const VRegister& vm);
2287
2288 // Subtract.
2289 void sub(const VRegister& vd,
2290 const VRegister& vn,
2291 const VRegister& vm);
2292
2293 // Unsigned halving add.
2294 void uhadd(const VRegister& vd,
2295 const VRegister& vn,
2296 const VRegister& vm);
2297
2298 // Signed halving add.
2299 void shadd(const VRegister& vd,
2300 const VRegister& vn,
2301 const VRegister& vm);
2302
2303 // Unsigned rounding halving add.
2304 void urhadd(const VRegister& vd,
2305 const VRegister& vn,
2306 const VRegister& vm);
2307
2308 // Signed rounding halving add.
2309 void srhadd(const VRegister& vd,
2310 const VRegister& vn,
2311 const VRegister& vm);
2312
2313 // Unsigned halving sub.
2314 void uhsub(const VRegister& vd,
2315 const VRegister& vn,
2316 const VRegister& vm);
2317
2318 // Signed halving sub.
2319 void shsub(const VRegister& vd,
2320 const VRegister& vn,
2321 const VRegister& vm);
2322
2323 // Unsigned saturating add.
2324 void uqadd(const VRegister& vd,
2325 const VRegister& vn,
2326 const VRegister& vm);
2327
2328 // Signed saturating add.
2329 void sqadd(const VRegister& vd,
2330 const VRegister& vn,
2331 const VRegister& vm);
2332
2333 // Unsigned saturating subtract.
2334 void uqsub(const VRegister& vd,
2335 const VRegister& vn,
2336 const VRegister& vm);
2337
2338 // Signed saturating subtract.
2339 void sqsub(const VRegister& vd,
2340 const VRegister& vn,
2341 const VRegister& vm);
2342
2343 // Add pairwise.
2344 void addp(const VRegister& vd,
2345 const VRegister& vn,
2346 const VRegister& vm);
2347
2348 // Add pair of elements scalar.
2349 void addp(const VRegister& vd,
2350 const VRegister& vn);
2351
2352 // Multiply-add to accumulator.
2353 void mla(const VRegister& vd,
2354 const VRegister& vn,
2355 const VRegister& vm);
2356
2357 // Multiply-subtract to accumulator.
2358 void mls(const VRegister& vd,
2359 const VRegister& vn,
2360 const VRegister& vm);
2361
2362 // Multiply.
2363 void mul(const VRegister& vd,
2364 const VRegister& vn,
2365 const VRegister& vm);
2366
2367 // Multiply by scalar element.
2368 void mul(const VRegister& vd,
2369 const VRegister& vn,
2370 const VRegister& vm,
2371 int vm_index);
2372
2373 // Multiply-add by scalar element.
2374 void mla(const VRegister& vd,
2375 const VRegister& vn,
2376 const VRegister& vm,
2377 int vm_index);
2378
2379 // Multiply-subtract by scalar element.
2380 void mls(const VRegister& vd,
2381 const VRegister& vn,
2382 const VRegister& vm,
2383 int vm_index);
2384
2385 // Signed long multiply-add by scalar element.
2386 void smlal(const VRegister& vd,
2387 const VRegister& vn,
2388 const VRegister& vm,
2389 int vm_index);
2390
2391 // Signed long multiply-add by scalar element (second part).
2392 void smlal2(const VRegister& vd,
2393 const VRegister& vn,
2394 const VRegister& vm,
2395 int vm_index);
2396
2397 // Unsigned long multiply-add by scalar element.
2398 void umlal(const VRegister& vd,
2399 const VRegister& vn,
2400 const VRegister& vm,
2401 int vm_index);
2402
2403 // Unsigned long multiply-add by scalar element (second part).
2404 void umlal2(const VRegister& vd,
2405 const VRegister& vn,
2406 const VRegister& vm,
2407 int vm_index);
2408
2409 // Signed long multiply-sub by scalar element.
2410 void smlsl(const VRegister& vd,
2411 const VRegister& vn,
2412 const VRegister& vm,
2413 int vm_index);
2414
2415 // Signed long multiply-sub by scalar element (second part).
2416 void smlsl2(const VRegister& vd,
2417 const VRegister& vn,
2418 const VRegister& vm,
2419 int vm_index);
2420
2421 // Unsigned long multiply-sub by scalar element.
2422 void umlsl(const VRegister& vd,
2423 const VRegister& vn,
2424 const VRegister& vm,
2425 int vm_index);
2426
2427 // Unsigned long multiply-sub by scalar element (second part).
2428 void umlsl2(const VRegister& vd,
2429 const VRegister& vn,
2430 const VRegister& vm,
2431 int vm_index);
2432
2433 // Signed long multiply by scalar element.
2434 void smull(const VRegister& vd,
2435 const VRegister& vn,
2436 const VRegister& vm,
2437 int vm_index);
2438
2439 // Signed long multiply by scalar element (second part).
2440 void smull2(const VRegister& vd,
2441 const VRegister& vn,
2442 const VRegister& vm,
2443 int vm_index);
2444
2445 // Unsigned long multiply by scalar element.
2446 void umull(const VRegister& vd,
2447 const VRegister& vn,
2448 const VRegister& vm,
2449 int vm_index);
2450
2451 // Unsigned long multiply by scalar element (second part).
2452 void umull2(const VRegister& vd,
2453 const VRegister& vn,
2454 const VRegister& vm,
2455 int vm_index);
2456
2457 // Signed saturating double long multiply by element.
2458 void sqdmull(const VRegister& vd,
2459 const VRegister& vn,
2460 const VRegister& vm,
2461 int vm_index);
2462
2463 // Signed saturating double long multiply by element (second part).
2464 void sqdmull2(const VRegister& vd,
2465 const VRegister& vn,
2466 const VRegister& vm,
2467 int vm_index);
2468
2469 // Signed saturating doubling long multiply-add by element.
2470 void sqdmlal(const VRegister& vd,
2471 const VRegister& vn,
2472 const VRegister& vm,
2473 int vm_index);
2474
2475 // Signed saturating doubling long multiply-add by element (second part).
2476 void sqdmlal2(const VRegister& vd,
2477 const VRegister& vn,
2478 const VRegister& vm,
2479 int vm_index);
2480
2481 // Signed saturating doubling long multiply-sub by element.
2482 void sqdmlsl(const VRegister& vd,
2483 const VRegister& vn,
2484 const VRegister& vm,
2485 int vm_index);
2486
2487 // Signed saturating doubling long multiply-sub by element (second part).
2488 void sqdmlsl2(const VRegister& vd,
2489 const VRegister& vn,
2490 const VRegister& vm,
2491 int vm_index);
2492
2493 // Compare equal.
2494 void cmeq(const VRegister& vd,
2495 const VRegister& vn,
2496 const VRegister& vm);
2497
2498 // Compare signed greater than or equal.
2499 void cmge(const VRegister& vd,
2500 const VRegister& vn,
2501 const VRegister& vm);
2502
2503 // Compare signed greater than.
2504 void cmgt(const VRegister& vd,
2505 const VRegister& vn,
2506 const VRegister& vm);
2507
2508 // Compare unsigned higher.
2509 void cmhi(const VRegister& vd,
2510 const VRegister& vn,
2511 const VRegister& vm);
2512
2513 // Compare unsigned higher or same.
2514 void cmhs(const VRegister& vd,
2515 const VRegister& vn,
2516 const VRegister& vm);
2517
2518 // Compare bitwise test bits nonzero.
2519 void cmtst(const VRegister& vd,
2520 const VRegister& vn,
2521 const VRegister& vm);
2522
2523 // Compare bitwise to zero.
2524 void cmeq(const VRegister& vd,
2525 const VRegister& vn,
2526 int value);
2527
2528 // Compare signed greater than or equal to zero.
2529 void cmge(const VRegister& vd,
2530 const VRegister& vn,
2531 int value);
2532
2533 // Compare signed greater than zero.
2534 void cmgt(const VRegister& vd,
2535 const VRegister& vn,
2536 int value);
2537
2538 // Compare signed less than or equal to zero.
2539 void cmle(const VRegister& vd,
2540 const VRegister& vn,
2541 int value);
2542
2543 // Compare signed less than zero.
2544 void cmlt(const VRegister& vd,
2545 const VRegister& vn,
2546 int value);
2547
2548 // Signed shift left by register.
2549 void sshl(const VRegister& vd,
2550 const VRegister& vn,
2551 const VRegister& vm);
2552
2553 // Unsigned shift left by register.
2554 void ushl(const VRegister& vd,
2555 const VRegister& vn,
2556 const VRegister& vm);
2557
2558 // Signed saturating shift left by register.
2559 void sqshl(const VRegister& vd,
2560 const VRegister& vn,
2561 const VRegister& vm);
2562
2563 // Unsigned saturating shift left by register.
2564 void uqshl(const VRegister& vd,
2565 const VRegister& vn,
2566 const VRegister& vm);
2567
2568 // Signed rounding shift left by register.
2569 void srshl(const VRegister& vd,
2570 const VRegister& vn,
2571 const VRegister& vm);
2572
2573 // Unsigned rounding shift left by register.
2574 void urshl(const VRegister& vd,
2575 const VRegister& vn,
2576 const VRegister& vm);
2577
2578 // Signed saturating rounding shift left by register.
2579 void sqrshl(const VRegister& vd,
2580 const VRegister& vn,
2581 const VRegister& vm);
2582
2583 // Unsigned saturating rounding shift left by register.
2584 void uqrshl(const VRegister& vd,
2585 const VRegister& vn,
2586 const VRegister& vm);
2587
2588 // Bitwise and.
2589 void and_(const VRegister& vd,
2590 const VRegister& vn,
2591 const VRegister& vm);
2592
2593 // Bitwise or.
2594 void orr(const VRegister& vd,
2595 const VRegister& vn,
2596 const VRegister& vm);
2597
2598 // Bitwise or immediate.
2599 void orr(const VRegister& vd,
2600 const int imm8,
2601 const int left_shift = 0);
2602
2603 // Move register to register.
2604 void mov(const VRegister& vd,
2605 const VRegister& vn);
2606
2607 // Bitwise orn.
2608 void orn(const VRegister& vd,
2609 const VRegister& vn,
2610 const VRegister& vm);
2611
2612 // Bitwise eor.
2613 void eor(const VRegister& vd,
2614 const VRegister& vn,
2615 const VRegister& vm);
2616
2617 // Bit clear immediate.
2618 void bic(const VRegister& vd,
2619 const int imm8,
2620 const int left_shift = 0);
2621
2622 // Bit clear.
2623 void bic(const VRegister& vd,
2624 const VRegister& vn,
2625 const VRegister& vm);
2626
2627 // Bitwise insert if false.
2628 void bif(const VRegister& vd,
2629 const VRegister& vn,
2630 const VRegister& vm);
2631
2632 // Bitwise insert if true.
2633 void bit(const VRegister& vd,
2634 const VRegister& vn,
2635 const VRegister& vm);
2636
2637 // Bitwise select.
2638 void bsl(const VRegister& vd,
2639 const VRegister& vn,
2640 const VRegister& vm);
2641
2642 // Polynomial multiply.
2643 void pmul(const VRegister& vd,
2644 const VRegister& vn,
2645 const VRegister& vm);
2646
2647 // Vector move immediate.
2648 void movi(const VRegister& vd,
2649 const uint64_t imm,
2650 Shift shift = LSL,
2651 const int shift_amount = 0);
2652
2653 // Bitwise not.
2654 void mvn(const VRegister& vd,
2655 const VRegister& vn);
2656
2657 // Vector move inverted immediate.
2658 void mvni(const VRegister& vd,
2659 const int imm8,
2660 Shift shift = LSL,
2661 const int shift_amount = 0);
2662
2663 // Signed saturating accumulate of unsigned value.
2664 void suqadd(const VRegister& vd,
2665 const VRegister& vn);
2666
2667 // Unsigned saturating accumulate of signed value.
2668 void usqadd(const VRegister& vd,
2669 const VRegister& vn);
2670
2671 // Absolute value.
2672 void abs(const VRegister& vd,
2673 const VRegister& vn);
2674
2675 // Signed saturating absolute value.
2676 void sqabs(const VRegister& vd,
2677 const VRegister& vn);
2678
2679 // Negate.
2680 void neg(const VRegister& vd,
2681 const VRegister& vn);
2682
2683 // Signed saturating negate.
2684 void sqneg(const VRegister& vd,
2685 const VRegister& vn);
2686
2687 // Bitwise not.
2688 void not_(const VRegister& vd,
2689 const VRegister& vn);
2690
2691 // Extract narrow.
2692 void xtn(const VRegister& vd,
2693 const VRegister& vn);
2694
2695 // Extract narrow (second part).
2696 void xtn2(const VRegister& vd,
2697 const VRegister& vn);
2698
2699 // Signed saturating extract narrow.
2700 void sqxtn(const VRegister& vd,
2701 const VRegister& vn);
2702
2703 // Signed saturating extract narrow (second part).
2704 void sqxtn2(const VRegister& vd,
2705 const VRegister& vn);
2706
2707 // Unsigned saturating extract narrow.
2708 void uqxtn(const VRegister& vd,
2709 const VRegister& vn);
2710
2711 // Unsigned saturating extract narrow (second part).
2712 void uqxtn2(const VRegister& vd,
2713 const VRegister& vn);
2714
2715 // Signed saturating extract unsigned narrow.
2716 void sqxtun(const VRegister& vd,
2717 const VRegister& vn);
2718
2719 // Signed saturating extract unsigned narrow (second part).
2720 void sqxtun2(const VRegister& vd,
2721 const VRegister& vn);
2722
2723 // Extract vector from pair of vectors.
2724 void ext(const VRegister& vd,
2725 const VRegister& vn,
2726 const VRegister& vm,
2727 int index);
2728
2729 // Duplicate vector element to vector or scalar.
2730 void dup(const VRegister& vd,
2731 const VRegister& vn,
2732 int vn_index);
2733
2734 // Move vector element to scalar.
2735 void mov(const VRegister& vd,
2736 const VRegister& vn,
2737 int vn_index);
2738
2739 // Duplicate general-purpose register to vector.
2740 void dup(const VRegister& vd,
2741 const Register& rn);
2742
2743 // Insert vector element from another vector element.
2744 void ins(const VRegister& vd,
2745 int vd_index,
2746 const VRegister& vn,
2747 int vn_index);
2748
2749 // Move vector element to another vector element.
2750 void mov(const VRegister& vd,
2751 int vd_index,
2752 const VRegister& vn,
2753 int vn_index);
2754
2755 // Insert vector element from general-purpose register.
2756 void ins(const VRegister& vd,
2757 int vd_index,
2758 const Register& rn);
2759
2760 // Move general-purpose register to a vector element.
2761 void mov(const VRegister& vd,
2762 int vd_index,
2763 const Register& rn);
2764
2765 // Unsigned move vector element to general-purpose register.
2766 void umov(const Register& rd,
2767 const VRegister& vn,
2768 int vn_index);
2769
2770 // Move vector element to general-purpose register.
2771 void mov(const Register& rd,
2772 const VRegister& vn,
2773 int vn_index);
2774
2775 // Signed move vector element to general-purpose register.
2776 void smov(const Register& rd,
2777 const VRegister& vn,
2778 int vn_index);
2779
2780 // One-element structure load to one register.
2781 void ld1(const VRegister& vt,
2782 const MemOperand& src);
2783
2784 // One-element structure load to two registers.
2785 void ld1(const VRegister& vt,
2786 const VRegister& vt2,
2787 const MemOperand& src);
2788
2789 // One-element structure load to three registers.
2790 void ld1(const VRegister& vt,
2791 const VRegister& vt2,
2792 const VRegister& vt3,
2793 const MemOperand& src);
2794
2795 // One-element structure load to four registers.
2796 void ld1(const VRegister& vt,
2797 const VRegister& vt2,
2798 const VRegister& vt3,
2799 const VRegister& vt4,
2800 const MemOperand& src);
2801
2802 // One-element single structure load to one lane.
2803 void ld1(const VRegister& vt,
2804 int lane,
2805 const MemOperand& src);
2806
2807 // One-element single structure load to all lanes.
2808 void ld1r(const VRegister& vt,
2809 const MemOperand& src);
2810
2811 // Two-element structure load.
2812 void ld2(const VRegister& vt,
2813 const VRegister& vt2,
2814 const MemOperand& src);
2815
2816 // Two-element single structure load to one lane.
2817 void ld2(const VRegister& vt,
2818 const VRegister& vt2,
2819 int lane,
2820 const MemOperand& src);
2821
2822 // Two-element single structure load to all lanes.
2823 void ld2r(const VRegister& vt,
2824 const VRegister& vt2,
2825 const MemOperand& src);
2826
2827 // Three-element structure load.
2828 void ld3(const VRegister& vt,
2829 const VRegister& vt2,
2830 const VRegister& vt3,
2831 const MemOperand& src);
2832
2833 // Three-element single structure load to one lane.
2834 void ld3(const VRegister& vt,
2835 const VRegister& vt2,
2836 const VRegister& vt3,
2837 int lane,
2838 const MemOperand& src);
2839
2840 // Three-element single structure load to all lanes.
2841 void ld3r(const VRegister& vt,
2842 const VRegister& vt2,
2843 const VRegister& vt3,
2844 const MemOperand& src);
2845
2846 // Four-element structure load.
2847 void ld4(const VRegister& vt,
2848 const VRegister& vt2,
2849 const VRegister& vt3,
2850 const VRegister& vt4,
2851 const MemOperand& src);
2852
2853 // Four-element single structure load to one lane.
2854 void ld4(const VRegister& vt,
2855 const VRegister& vt2,
2856 const VRegister& vt3,
2857 const VRegister& vt4,
2858 int lane,
2859 const MemOperand& src);
2860
2861 // Four-element single structure load to all lanes.
2862 void ld4r(const VRegister& vt,
2863 const VRegister& vt2,
2864 const VRegister& vt3,
2865 const VRegister& vt4,
2866 const MemOperand& src);
2867
2868 // Count leading sign bits.
2869 void cls(const VRegister& vd,
2870 const VRegister& vn);
2871
2872 // Count leading zero bits (vector).
2873 void clz(const VRegister& vd,
2874 const VRegister& vn);
2875
2876 // Population count per byte.
2877 void cnt(const VRegister& vd,
2878 const VRegister& vn);
2879
2880 // Reverse bit order.
2881 void rbit(const VRegister& vd,
2882 const VRegister& vn);
2883
2884 // Reverse elements in 16-bit halfwords.
2885 void rev16(const VRegister& vd,
2886 const VRegister& vn);
2887
2888 // Reverse elements in 32-bit words.
2889 void rev32(const VRegister& vd,
2890 const VRegister& vn);
2891
2892 // Reverse elements in 64-bit doublewords.
2893 void rev64(const VRegister& vd,
2894 const VRegister& vn);
2895
2896 // Unsigned reciprocal square root estimate.
2897 void ursqrte(const VRegister& vd,
2898 const VRegister& vn);
2899
2900 // Unsigned reciprocal estimate.
2901 void urecpe(const VRegister& vd,
2902 const VRegister& vn);
2903
2904 // Signed pairwise long add.
2905 void saddlp(const VRegister& vd,
2906 const VRegister& vn);
2907
2908 // Unsigned pairwise long add.
2909 void uaddlp(const VRegister& vd,
2910 const VRegister& vn);
2911
2912 // Signed pairwise long add and accumulate.
2913 void sadalp(const VRegister& vd,
2914 const VRegister& vn);
2915
2916 // Unsigned pairwise long add and accumulate.
2917 void uadalp(const VRegister& vd,
2918 const VRegister& vn);
2919
2920 // Shift left by immediate.
2921 void shl(const VRegister& vd,
2922 const VRegister& vn,
2923 int shift);
2924
2925 // Signed saturating shift left by immediate.
2926 void sqshl(const VRegister& vd,
2927 const VRegister& vn,
2928 int shift);
2929
2930 // Signed saturating shift left unsigned by immediate.
2931 void sqshlu(const VRegister& vd,
2932 const VRegister& vn,
2933 int shift);
2934
2935 // Unsigned saturating shift left by immediate.
2936 void uqshl(const VRegister& vd,
2937 const VRegister& vn,
2938 int shift);
2939
2940 // Signed shift left long by immediate.
2941 void sshll(const VRegister& vd,
2942 const VRegister& vn,
2943 int shift);
2944
2945 // Signed shift left long by immediate (second part).
2946 void sshll2(const VRegister& vd,
2947 const VRegister& vn,
2948 int shift);
2949
2950 // Signed extend long.
2951 void sxtl(const VRegister& vd,
2952 const VRegister& vn);
2953
2954 // Signed extend long (second part).
2955 void sxtl2(const VRegister& vd,
2956 const VRegister& vn);
2957
2958 // Unsigned shift left long by immediate.
2959 void ushll(const VRegister& vd,
2960 const VRegister& vn,
2961 int shift);
2962
2963 // Unsigned shift left long by immediate (second part).
2964 void ushll2(const VRegister& vd,
2965 const VRegister& vn,
2966 int shift);
2967
2968 // Shift left long by element size.
2969 void shll(const VRegister& vd,
2970 const VRegister& vn,
2971 int shift);
2972
2973 // Shift left long by element size (second part).
2974 void shll2(const VRegister& vd,
2975 const VRegister& vn,
2976 int shift);
2977
2978 // Unsigned extend long.
2979 void uxtl(const VRegister& vd,
2980 const VRegister& vn);
2981
2982 // Unsigned extend long (second part).
2983 void uxtl2(const VRegister& vd,
2984 const VRegister& vn);
2985
2986 // Shift left by immediate and insert.
2987 void sli(const VRegister& vd,
2988 const VRegister& vn,
2989 int shift);
2990
2991 // Shift right by immediate and insert.
2992 void sri(const VRegister& vd,
2993 const VRegister& vn,
2994 int shift);
2995
2996 // Signed maximum.
2997 void smax(const VRegister& vd,
2998 const VRegister& vn,
2999 const VRegister& vm);
3000
3001 // Signed pairwise maximum.
3002 void smaxp(const VRegister& vd,
3003 const VRegister& vn,
3004 const VRegister& vm);
3005
3006 // Add across vector.
3007 void addv(const VRegister& vd,
3008 const VRegister& vn);
3009
3010 // Signed add long across vector.
3011 void saddlv(const VRegister& vd,
3012 const VRegister& vn);
3013
3014 // Unsigned add long across vector.
3015 void uaddlv(const VRegister& vd,
3016 const VRegister& vn);
3017
3018 // FP maximum number across vector.
3019 void fmaxnmv(const VRegister& vd,
3020 const VRegister& vn);
3021
3022 // FP maximum across vector.
3023 void fmaxv(const VRegister& vd,
3024 const VRegister& vn);
3025
3026 // FP minimum number across vector.
3027 void fminnmv(const VRegister& vd,
3028 const VRegister& vn);
3029
3030 // FP minimum across vector.
3031 void fminv(const VRegister& vd,
3032 const VRegister& vn);
3033
3034 // Signed maximum across vector.
3035 void smaxv(const VRegister& vd,
3036 const VRegister& vn);
3037
3038 // Signed minimum.
3039 void smin(const VRegister& vd,
3040 const VRegister& vn,
3041 const VRegister& vm);
3042
3043 // Signed minimum pairwise.
3044 void sminp(const VRegister& vd,
3045 const VRegister& vn,
3046 const VRegister& vm);
3047
3048 // Signed minimum across vector.
3049 void sminv(const VRegister& vd,
3050 const VRegister& vn);
3051
3052 // One-element structure store from one register.
3053 void st1(const VRegister& vt,
3054 const MemOperand& src);
3055
3056 // One-element structure store from two registers.
3057 void st1(const VRegister& vt,
3058 const VRegister& vt2,
3059 const MemOperand& src);
3060
3061 // One-element structure store from three registers.
3062 void st1(const VRegister& vt,
3063 const VRegister& vt2,
3064 const VRegister& vt3,
3065 const MemOperand& src);
3066
3067 // One-element structure store from four registers.
3068 void st1(const VRegister& vt,
3069 const VRegister& vt2,
3070 const VRegister& vt3,
3071 const VRegister& vt4,
3072 const MemOperand& src);
3073
3074 // One-element single structure store from one lane.
3075 void st1(const VRegister& vt,
3076 int lane,
3077 const MemOperand& src);
3078
3079 // Two-element structure store from two registers.
3080 void st2(const VRegister& vt,
3081 const VRegister& vt2,
3082 const MemOperand& src);
3083
3084 // Two-element single structure store from two lanes.
3085 void st2(const VRegister& vt,
3086 const VRegister& vt2,
3087 int lane,
3088 const MemOperand& src);
3089
3090 // Three-element structure store from three registers.
3091 void st3(const VRegister& vt,
3092 const VRegister& vt2,
3093 const VRegister& vt3,
3094 const MemOperand& src);
3095
3096 // Three-element single structure store from three lanes.
3097 void st3(const VRegister& vt,
3098 const VRegister& vt2,
3099 const VRegister& vt3,
3100 int lane,
3101 const MemOperand& src);
3102
3103 // Four-element structure store from four registers.
3104 void st4(const VRegister& vt,
3105 const VRegister& vt2,
3106 const VRegister& vt3,
3107 const VRegister& vt4,
3108 const MemOperand& src);
3109
3110 // Four-element single structure store from four lanes.
3111 void st4(const VRegister& vt,
3112 const VRegister& vt2,
3113 const VRegister& vt3,
3114 const VRegister& vt4,
3115 int lane,
3116 const MemOperand& src);
3117
3118 // Unsigned add long.
3119 void uaddl(const VRegister& vd,
3120 const VRegister& vn,
3121 const VRegister& vm);
3122
3123 // Unsigned add long (second part).
3124 void uaddl2(const VRegister& vd,
3125 const VRegister& vn,
3126 const VRegister& vm);
3127
3128 // Unsigned add wide.
3129 void uaddw(const VRegister& vd,
3130 const VRegister& vn,
3131 const VRegister& vm);
3132
3133 // Unsigned add wide (second part).
3134 void uaddw2(const VRegister& vd,
3135 const VRegister& vn,
3136 const VRegister& vm);
3137
3138 // Signed add long.
3139 void saddl(const VRegister& vd,
3140 const VRegister& vn,
3141 const VRegister& vm);
3142
3143 // Signed add long (second part).
3144 void saddl2(const VRegister& vd,
3145 const VRegister& vn,
3146 const VRegister& vm);
3147
3148 // Signed add wide.
3149 void saddw(const VRegister& vd,
3150 const VRegister& vn,
3151 const VRegister& vm);
3152
3153 // Signed add wide (second part).
3154 void saddw2(const VRegister& vd,
3155 const VRegister& vn,
3156 const VRegister& vm);
3157
3158 // Unsigned subtract long.
3159 void usubl(const VRegister& vd,
3160 const VRegister& vn,
3161 const VRegister& vm);
3162
3163 // Unsigned subtract long (second part).
3164 void usubl2(const VRegister& vd,
3165 const VRegister& vn,
3166 const VRegister& vm);
3167
3168 // Unsigned subtract wide.
3169 void usubw(const VRegister& vd,
3170 const VRegister& vn,
3171 const VRegister& vm);
3172
3173 // Unsigned subtract wide (second part).
3174 void usubw2(const VRegister& vd,
3175 const VRegister& vn,
3176 const VRegister& vm);
3177
3178 // Signed subtract long.
3179 void ssubl(const VRegister& vd,
3180 const VRegister& vn,
3181 const VRegister& vm);
3182
3183 // Signed subtract long (second part).
3184 void ssubl2(const VRegister& vd,
3185 const VRegister& vn,
3186 const VRegister& vm);
3187
3188 // Signed integer subtract wide.
3189 void ssubw(const VRegister& vd,
3190 const VRegister& vn,
3191 const VRegister& vm);
3192
3193 // Signed integer subtract wide (second part).
3194 void ssubw2(const VRegister& vd,
3195 const VRegister& vn,
3196 const VRegister& vm);
3197
3198 // Unsigned maximum.
3199 void umax(const VRegister& vd,
3200 const VRegister& vn,
3201 const VRegister& vm);
3202
3203 // Unsigned pairwise maximum.
3204 void umaxp(const VRegister& vd,
3205 const VRegister& vn,
3206 const VRegister& vm);
3207
3208 // Unsigned maximum across vector.
3209 void umaxv(const VRegister& vd,
3210 const VRegister& vn);
3211
3212 // Unsigned minimum.
3213 void umin(const VRegister& vd,
3214 const VRegister& vn,
3215 const VRegister& vm);
3216
3217 // Unsigned pairwise minimum.
3218 void uminp(const VRegister& vd,
3219 const VRegister& vn,
3220 const VRegister& vm);
3221
3222 // Unsigned minimum across vector.
3223 void uminv(const VRegister& vd,
3224 const VRegister& vn);
3225
3226 // Transpose vectors (primary).
3227 void trn1(const VRegister& vd,
3228 const VRegister& vn,
3229 const VRegister& vm);
3230
3231 // Transpose vectors (secondary).
3232 void trn2(const VRegister& vd,
3233 const VRegister& vn,
3234 const VRegister& vm);
3235
3236 // Unzip vectors (primary).
3237 void uzp1(const VRegister& vd,
3238 const VRegister& vn,
3239 const VRegister& vm);
3240
3241 // Unzip vectors (secondary).
3242 void uzp2(const VRegister& vd,
3243 const VRegister& vn,
3244 const VRegister& vm);
3245
3246 // Zip vectors (primary).
3247 void zip1(const VRegister& vd,
3248 const VRegister& vn,
3249 const VRegister& vm);
3250
3251 // Zip vectors (secondary).
3252 void zip2(const VRegister& vd,
3253 const VRegister& vn,
3254 const VRegister& vm);
3255
3256 // Signed shift right by immediate.
3257 void sshr(const VRegister& vd,
3258 const VRegister& vn,
3259 int shift);
3260
3261 // Unsigned shift right by immediate.
3262 void ushr(const VRegister& vd,
3263 const VRegister& vn,
3264 int shift);
3265
3266 // Signed rounding shift right by immediate.
3267 void srshr(const VRegister& vd,
3268 const VRegister& vn,
3269 int shift);
3270
3271 // Unsigned rounding shift right by immediate.
3272 void urshr(const VRegister& vd,
3273 const VRegister& vn,
3274 int shift);
3275
3276 // Signed shift right by immediate and accumulate.
3277 void ssra(const VRegister& vd,
3278 const VRegister& vn,
3279 int shift);
3280
3281 // Unsigned shift right by immediate and accumulate.
3282 void usra(const VRegister& vd,
3283 const VRegister& vn,
3284 int shift);
3285
3286 // Signed rounding shift right by immediate and accumulate.
3287 void srsra(const VRegister& vd,
3288 const VRegister& vn,
3289 int shift);
3290
3291 // Unsigned rounding shift right by immediate and accumulate.
3292 void ursra(const VRegister& vd,
3293 const VRegister& vn,
3294 int shift);
3295
3296 // Shift right narrow by immediate.
3297 void shrn(const VRegister& vd,
3298 const VRegister& vn,
3299 int shift);
3300
3301 // Shift right narrow by immediate (second part).
3302 void shrn2(const VRegister& vd,
3303 const VRegister& vn,
3304 int shift);
3305
3306 // Rounding shift right narrow by immediate.
3307 void rshrn(const VRegister& vd,
3308 const VRegister& vn,
3309 int shift);
3310
3311 // Rounding shift right narrow by immediate (second part).
3312 void rshrn2(const VRegister& vd,
3313 const VRegister& vn,
3314 int shift);
3315
3316 // Unsigned saturating shift right narrow by immediate.
3317 void uqshrn(const VRegister& vd,
3318 const VRegister& vn,
3319 int shift);
3320
3321 // Unsigned saturating shift right narrow by immediate (second part).
3322 void uqshrn2(const VRegister& vd,
3323 const VRegister& vn,
3324 int shift);
3325
3326 // Unsigned saturating rounding shift right narrow by immediate.
3327 void uqrshrn(const VRegister& vd,
3328 const VRegister& vn,
3329 int shift);
3330
3331 // Unsigned saturating rounding shift right narrow by immediate (second part).
3332 void uqrshrn2(const VRegister& vd,
3333 const VRegister& vn,
3334 int shift);
3335
3336 // Signed saturating shift right narrow by immediate.
3337 void sqshrn(const VRegister& vd,
3338 const VRegister& vn,
3339 int shift);
3340
3341 // Signed saturating shift right narrow by immediate (second part).
3342 void sqshrn2(const VRegister& vd,
3343 const VRegister& vn,
3344 int shift);
3345
3346 // Signed saturating rounded shift right narrow by immediate.
3347 void sqrshrn(const VRegister& vd,
3348 const VRegister& vn,
3349 int shift);
3350
3351 // Signed saturating rounded shift right narrow by immediate (second part).
3352 void sqrshrn2(const VRegister& vd,
3353 const VRegister& vn,
3354 int shift);
3355
3356 // Signed saturating shift right unsigned narrow by immediate.
3357 void sqshrun(const VRegister& vd,
3358 const VRegister& vn,
3359 int shift);
3360
3361 // Signed saturating shift right unsigned narrow by immediate (second part).
3362 void sqshrun2(const VRegister& vd,
3363 const VRegister& vn,
3364 int shift);
3365
3366 // Signed sat rounded shift right unsigned narrow by immediate.
3367 void sqrshrun(const VRegister& vd,
3368 const VRegister& vn,
3369 int shift);
3370
3371 // Signed sat rounded shift right unsigned narrow by immediate (second part).
3372 void sqrshrun2(const VRegister& vd,
3373 const VRegister& vn,
3374 int shift);
3375
3376 // FP reciprocal step.
3377 void frecps(const VRegister& vd,
3378 const VRegister& vn,
3379 const VRegister& vm);
3380
3381 // FP reciprocal estimate.
3382 void frecpe(const VRegister& vd,
3383 const VRegister& vn);
3384
3385 // FP reciprocal square root estimate.
3386 void frsqrte(const VRegister& vd,
3387 const VRegister& vn);
3388
3389 // FP reciprocal square root step.
3390 void frsqrts(const VRegister& vd,
3391 const VRegister& vn,
3392 const VRegister& vm);
3393
3394 // Signed absolute difference and accumulate long.
3395 void sabal(const VRegister& vd,
3396 const VRegister& vn,
3397 const VRegister& vm);
3398
3399 // Signed absolute difference and accumulate long (second part).
3400 void sabal2(const VRegister& vd,
3401 const VRegister& vn,
3402 const VRegister& vm);
3403
3404 // Unsigned absolute difference and accumulate long.
3405 void uabal(const VRegister& vd,
3406 const VRegister& vn,
3407 const VRegister& vm);
3408
3409 // Unsigned absolute difference and accumulate long (second part).
3410 void uabal2(const VRegister& vd,
3411 const VRegister& vn,
3412 const VRegister& vm);
3413
3414 // Signed absolute difference long.
3415 void sabdl(const VRegister& vd,
3416 const VRegister& vn,
3417 const VRegister& vm);
3418
3419 // Signed absolute difference long (second part).
3420 void sabdl2(const VRegister& vd,
3421 const VRegister& vn,
3422 const VRegister& vm);
3423
3424 // Unsigned absolute difference long.
3425 void uabdl(const VRegister& vd,
3426 const VRegister& vn,
3427 const VRegister& vm);
3428
3429 // Unsigned absolute difference long (second part).
3430 void uabdl2(const VRegister& vd,
3431 const VRegister& vn,
3432 const VRegister& vm);
3433
3434 // Polynomial multiply long.
3435 void pmull(const VRegister& vd,
3436 const VRegister& vn,
3437 const VRegister& vm);
3438
3439 // Polynomial multiply long (second part).
3440 void pmull2(const VRegister& vd,
3441 const VRegister& vn,
3442 const VRegister& vm);
3443
3444 // Signed long multiply-add.
3445 void smlal(const VRegister& vd,
3446 const VRegister& vn,
3447 const VRegister& vm);
3448
3449 // Signed long multiply-add (second part).
3450 void smlal2(const VRegister& vd,
3451 const VRegister& vn,
3452 const VRegister& vm);
3453
3454 // Unsigned long multiply-add.
3455 void umlal(const VRegister& vd,
3456 const VRegister& vn,
3457 const VRegister& vm);
3458
3459 // Unsigned long multiply-add (second part).
3460 void umlal2(const VRegister& vd,
3461 const VRegister& vn,
3462 const VRegister& vm);
3463
3464 // Signed long multiply-sub.
3465 void smlsl(const VRegister& vd,
3466 const VRegister& vn,
3467 const VRegister& vm);
3468
3469 // Signed long multiply-sub (second part).
3470 void smlsl2(const VRegister& vd,
3471 const VRegister& vn,
3472 const VRegister& vm);
3473
3474 // Unsigned long multiply-sub.
3475 void umlsl(const VRegister& vd,
3476 const VRegister& vn,
3477 const VRegister& vm);
3478
3479 // Unsigned long multiply-sub (second part).
3480 void umlsl2(const VRegister& vd,
3481 const VRegister& vn,
3482 const VRegister& vm);
3483
3484 // Signed long multiply.
3485 void smull(const VRegister& vd,
3486 const VRegister& vn,
3487 const VRegister& vm);
3488
3489 // Signed long multiply (second part).
3490 void smull2(const VRegister& vd,
3491 const VRegister& vn,
3492 const VRegister& vm);
3493
3494 // Signed saturating doubling long multiply-add.
3495 void sqdmlal(const VRegister& vd,
3496 const VRegister& vn,
3497 const VRegister& vm);
3498
3499 // Signed saturating doubling long multiply-add (second part).
3500 void sqdmlal2(const VRegister& vd,
3501 const VRegister& vn,
3502 const VRegister& vm);
3503
3504 // Signed saturating doubling long multiply-subtract.
3505 void sqdmlsl(const VRegister& vd,
3506 const VRegister& vn,
3507 const VRegister& vm);
3508
3509 // Signed saturating doubling long multiply-subtract (second part).
3510 void sqdmlsl2(const VRegister& vd,
3511 const VRegister& vn,
3512 const VRegister& vm);
3513
3514 // Signed saturating doubling long multiply.
3515 void sqdmull(const VRegister& vd,
3516 const VRegister& vn,
3517 const VRegister& vm);
3518
3519 // Signed saturating doubling long multiply (second part).
3520 void sqdmull2(const VRegister& vd,
3521 const VRegister& vn,
3522 const VRegister& vm);
3523
3524 // Signed saturating doubling multiply returning high half.
3525 void sqdmulh(const VRegister& vd,
3526 const VRegister& vn,
3527 const VRegister& vm);
3528
3529 // Signed saturating rounding doubling multiply returning high half.
3530 void sqrdmulh(const VRegister& vd,
3531 const VRegister& vn,
3532 const VRegister& vm);
3533
3534 // Signed saturating doubling multiply element returning high half.
3535 void sqdmulh(const VRegister& vd,
3536 const VRegister& vn,
3537 const VRegister& vm,
3538 int vm_index);
3539
3540 // Signed saturating rounding doubling multiply element returning high half.
3541 void sqrdmulh(const VRegister& vd,
3542 const VRegister& vn,
3543 const VRegister& vm,
3544 int vm_index);
3545
3546 // Unsigned long multiply long.
3547 void umull(const VRegister& vd,
3548 const VRegister& vn,
3549 const VRegister& vm);
3550
3551 // Unsigned long multiply (second part).
3552 void umull2(const VRegister& vd,
3553 const VRegister& vn,
3554 const VRegister& vm);
3555
3556 // Add narrow returning high half.
3557 void addhn(const VRegister& vd,
3558 const VRegister& vn,
3559 const VRegister& vm);
3560
3561 // Add narrow returning high half (second part).
3562 void addhn2(const VRegister& vd,
3563 const VRegister& vn,
3564 const VRegister& vm);
3565
3566 // Rounding add narrow returning high half.
3567 void raddhn(const VRegister& vd,
3568 const VRegister& vn,
3569 const VRegister& vm);
3570
3571 // Rounding add narrow returning high half (second part).
3572 void raddhn2(const VRegister& vd,
3573 const VRegister& vn,
3574 const VRegister& vm);
3575
3576 // Subtract narrow returning high half.
3577 void subhn(const VRegister& vd,
3578 const VRegister& vn,
3579 const VRegister& vm);
3580
3581 // Subtract narrow returning high half (second part).
3582 void subhn2(const VRegister& vd,
3583 const VRegister& vn,
3584 const VRegister& vm);
3585
3586 // Rounding subtract narrow returning high half.
3587 void rsubhn(const VRegister& vd,
3588 const VRegister& vn,
3589 const VRegister& vm);
3590
3591 // Rounding subtract narrow returning high half (second part).
3592 void rsubhn2(const VRegister& vd,
3593 const VRegister& vn,
3594 const VRegister& vm);
3595
3596 // FP vector multiply accumulate.
3597 void fmla(const VRegister& vd,
3598 const VRegister& vn,
3599 const VRegister& vm);
3600
3601 // FP vector multiply subtract.
3602 void fmls(const VRegister& vd,
3603 const VRegister& vn,
3604 const VRegister& vm);
3605
3606 // FP vector multiply extended.
3607 void fmulx(const VRegister& vd,
3608 const VRegister& vn,
3609 const VRegister& vm);
3610
3611 // FP absolute greater than or equal.
3612 void facge(const VRegister& vd,
3613 const VRegister& vn,
3614 const VRegister& vm);
3615
3616 // FP absolute greater than.
3617 void facgt(const VRegister& vd,
3618 const VRegister& vn,
3619 const VRegister& vm);
3620
3621 // FP multiply by element.
3622 void fmul(const VRegister& vd,
3623 const VRegister& vn,
3624 const VRegister& vm,
3625 int vm_index);
3626
3627 // FP fused multiply-add to accumulator by element.
3628 void fmla(const VRegister& vd,
3629 const VRegister& vn,
3630 const VRegister& vm,
3631 int vm_index);
3632
3633 // FP fused multiply-sub from accumulator by element.
3634 void fmls(const VRegister& vd,
3635 const VRegister& vn,
3636 const VRegister& vm,
3637 int vm_index);
3638
3639 // FP multiply extended by element.
3640 void fmulx(const VRegister& vd,
3641 const VRegister& vn,
3642 const VRegister& vm,
3643 int vm_index);
3644
3645 // FP compare equal.
3646 void fcmeq(const VRegister& vd,
3647 const VRegister& vn,
3648 const VRegister& vm);
3649
3650 // FP greater than.
3651 void fcmgt(const VRegister& vd,
3652 const VRegister& vn,
3653 const VRegister& vm);
3654
3655 // FP greater than or equal.
3656 void fcmge(const VRegister& vd,
3657 const VRegister& vn,
3658 const VRegister& vm);
3659
3660 // FP compare equal to zero.
3661 void fcmeq(const VRegister& vd,
3662 const VRegister& vn,
3663 double imm);
3664
3665 // FP greater than zero.
3666 void fcmgt(const VRegister& vd,
3667 const VRegister& vn,
3668 double imm);
3669
3670 // FP greater than or equal to zero.
3671 void fcmge(const VRegister& vd,
3672 const VRegister& vn,
3673 double imm);
3674
3675 // FP less than or equal to zero.
3676 void fcmle(const VRegister& vd,
3677 const VRegister& vn,
3678 double imm);
3679
3680 // FP less than to zero.
3681 void fcmlt(const VRegister& vd,
3682 const VRegister& vn,
3683 double imm);
3684
3685 // FP absolute difference.
3686 void fabd(const VRegister& vd,
3687 const VRegister& vn,
3688 const VRegister& vm);
3689
3690 // FP pairwise add vector.
3691 void faddp(const VRegister& vd,
3692 const VRegister& vn,
3693 const VRegister& vm);
3694
3695 // FP pairwise add scalar.
3696 void faddp(const VRegister& vd,
3697 const VRegister& vn);
3698
3699 // FP pairwise maximum vector.
3700 void fmaxp(const VRegister& vd,
3701 const VRegister& vn,
3702 const VRegister& vm);
3703
3704 // FP pairwise maximum scalar.
3705 void fmaxp(const VRegister& vd,
3706 const VRegister& vn);
3707
3708 // FP pairwise minimum vector.
3709 void fminp(const VRegister& vd,
3710 const VRegister& vn,
3711 const VRegister& vm);
3712
3713 // FP pairwise minimum scalar.
3714 void fminp(const VRegister& vd,
3715 const VRegister& vn);
3716
3717 // FP pairwise maximum number vector.
3718 void fmaxnmp(const VRegister& vd,
3719 const VRegister& vn,
3720 const VRegister& vm);
3721
3722 // FP pairwise maximum number scalar.
3723 void fmaxnmp(const VRegister& vd,
3724 const VRegister& vn);
3725
3726 // FP pairwise minimum number vector.
3727 void fminnmp(const VRegister& vd,
3728 const VRegister& vn,
3729 const VRegister& vm);
3730
3731 // FP pairwise minimum number scalar.
3732 void fminnmp(const VRegister& vd,
3733 const VRegister& vn);
3734
3735 // Emit generic instructions.
3736 // Emit raw instructions into the instruction stream.
dci(Instr raw_inst)3737 void dci(Instr raw_inst) { Emit(raw_inst); }
3738
3739 // Emit 32 bits of data into the instruction stream.
dc32(uint32_t data)3740 void dc32(uint32_t data) {
3741 VIXL_ASSERT(buffer_monitor_ > 0);
3742 buffer_->Emit32(data);
3743 }
3744
3745 // Emit 64 bits of data into the instruction stream.
dc64(uint64_t data)3746 void dc64(uint64_t data) {
3747 VIXL_ASSERT(buffer_monitor_ > 0);
3748 buffer_->Emit64(data);
3749 }
3750
3751 // Copy a string into the instruction stream, including the terminating NULL
3752 // character. The instruction pointer is then aligned correctly for
3753 // subsequent instructions.
EmitString(const char * string)3754 void EmitString(const char * string) {
3755 VIXL_ASSERT(string != NULL);
3756 VIXL_ASSERT(buffer_monitor_ > 0);
3757
3758 buffer_->EmitString(string);
3759 buffer_->Align();
3760 }
3761
3762 // Code generation helpers.
3763
3764 // Register encoding.
Rd(CPURegister rd)3765 static Instr Rd(CPURegister rd) {
3766 VIXL_ASSERT(rd.code() != kSPRegInternalCode);
3767 return rd.code() << Rd_offset;
3768 }
3769
Rn(CPURegister rn)3770 static Instr Rn(CPURegister rn) {
3771 VIXL_ASSERT(rn.code() != kSPRegInternalCode);
3772 return rn.code() << Rn_offset;
3773 }
3774
Rm(CPURegister rm)3775 static Instr Rm(CPURegister rm) {
3776 VIXL_ASSERT(rm.code() != kSPRegInternalCode);
3777 return rm.code() << Rm_offset;
3778 }
3779
RmNot31(CPURegister rm)3780 static Instr RmNot31(CPURegister rm) {
3781 VIXL_ASSERT(rm.code() != kSPRegInternalCode);
3782 VIXL_ASSERT(!rm.IsZero());
3783 return Rm(rm);
3784 }
3785
Ra(CPURegister ra)3786 static Instr Ra(CPURegister ra) {
3787 VIXL_ASSERT(ra.code() != kSPRegInternalCode);
3788 return ra.code() << Ra_offset;
3789 }
3790
Rt(CPURegister rt)3791 static Instr Rt(CPURegister rt) {
3792 VIXL_ASSERT(rt.code() != kSPRegInternalCode);
3793 return rt.code() << Rt_offset;
3794 }
3795
Rt2(CPURegister rt2)3796 static Instr Rt2(CPURegister rt2) {
3797 VIXL_ASSERT(rt2.code() != kSPRegInternalCode);
3798 return rt2.code() << Rt2_offset;
3799 }
3800
Rs(CPURegister rs)3801 static Instr Rs(CPURegister rs) {
3802 VIXL_ASSERT(rs.code() != kSPRegInternalCode);
3803 return rs.code() << Rs_offset;
3804 }
3805
3806 // These encoding functions allow the stack pointer to be encoded, and
3807 // disallow the zero register.
RdSP(Register rd)3808 static Instr RdSP(Register rd) {
3809 VIXL_ASSERT(!rd.IsZero());
3810 return (rd.code() & kRegCodeMask) << Rd_offset;
3811 }
3812
RnSP(Register rn)3813 static Instr RnSP(Register rn) {
3814 VIXL_ASSERT(!rn.IsZero());
3815 return (rn.code() & kRegCodeMask) << Rn_offset;
3816 }
3817
3818 // Flags encoding.
Flags(FlagsUpdate S)3819 static Instr Flags(FlagsUpdate S) {
3820 if (S == SetFlags) {
3821 return 1 << FlagsUpdate_offset;
3822 } else if (S == LeaveFlags) {
3823 return 0 << FlagsUpdate_offset;
3824 }
3825 VIXL_UNREACHABLE();
3826 return 0;
3827 }
3828
Cond(Condition cond)3829 static Instr Cond(Condition cond) {
3830 return cond << Condition_offset;
3831 }
3832
3833 // PC-relative address encoding.
ImmPCRelAddress(int imm21)3834 static Instr ImmPCRelAddress(int imm21) {
3835 VIXL_ASSERT(is_int21(imm21));
3836 Instr imm = static_cast<Instr>(truncate_to_int21(imm21));
3837 Instr immhi = (imm >> ImmPCRelLo_width) << ImmPCRelHi_offset;
3838 Instr immlo = imm << ImmPCRelLo_offset;
3839 return (immhi & ImmPCRelHi_mask) | (immlo & ImmPCRelLo_mask);
3840 }
3841
3842 // Branch encoding.
ImmUncondBranch(int imm26)3843 static Instr ImmUncondBranch(int imm26) {
3844 VIXL_ASSERT(is_int26(imm26));
3845 return truncate_to_int26(imm26) << ImmUncondBranch_offset;
3846 }
3847
ImmCondBranch(int imm19)3848 static Instr ImmCondBranch(int imm19) {
3849 VIXL_ASSERT(is_int19(imm19));
3850 return truncate_to_int19(imm19) << ImmCondBranch_offset;
3851 }
3852
ImmCmpBranch(int imm19)3853 static Instr ImmCmpBranch(int imm19) {
3854 VIXL_ASSERT(is_int19(imm19));
3855 return truncate_to_int19(imm19) << ImmCmpBranch_offset;
3856 }
3857
ImmTestBranch(int imm14)3858 static Instr ImmTestBranch(int imm14) {
3859 VIXL_ASSERT(is_int14(imm14));
3860 return truncate_to_int14(imm14) << ImmTestBranch_offset;
3861 }
3862
ImmTestBranchBit(unsigned bit_pos)3863 static Instr ImmTestBranchBit(unsigned bit_pos) {
3864 VIXL_ASSERT(is_uint6(bit_pos));
3865 // Subtract five from the shift offset, as we need bit 5 from bit_pos.
3866 unsigned b5 = bit_pos << (ImmTestBranchBit5_offset - 5);
3867 unsigned b40 = bit_pos << ImmTestBranchBit40_offset;
3868 b5 &= ImmTestBranchBit5_mask;
3869 b40 &= ImmTestBranchBit40_mask;
3870 return b5 | b40;
3871 }
3872
3873 // Data Processing encoding.
SF(Register rd)3874 static Instr SF(Register rd) {
3875 return rd.Is64Bits() ? SixtyFourBits : ThirtyTwoBits;
3876 }
3877
ImmAddSub(int imm)3878 static Instr ImmAddSub(int imm) {
3879 VIXL_ASSERT(IsImmAddSub(imm));
3880 if (is_uint12(imm)) { // No shift required.
3881 imm <<= ImmAddSub_offset;
3882 } else {
3883 imm = ((imm >> 12) << ImmAddSub_offset) | (1 << ShiftAddSub_offset);
3884 }
3885 return imm;
3886 }
3887
ImmS(unsigned imms,unsigned reg_size)3888 static Instr ImmS(unsigned imms, unsigned reg_size) {
3889 VIXL_ASSERT(((reg_size == kXRegSize) && is_uint6(imms)) ||
3890 ((reg_size == kWRegSize) && is_uint5(imms)));
3891 USE(reg_size);
3892 return imms << ImmS_offset;
3893 }
3894
ImmR(unsigned immr,unsigned reg_size)3895 static Instr ImmR(unsigned immr, unsigned reg_size) {
3896 VIXL_ASSERT(((reg_size == kXRegSize) && is_uint6(immr)) ||
3897 ((reg_size == kWRegSize) && is_uint5(immr)));
3898 USE(reg_size);
3899 VIXL_ASSERT(is_uint6(immr));
3900 return immr << ImmR_offset;
3901 }
3902
ImmSetBits(unsigned imms,unsigned reg_size)3903 static Instr ImmSetBits(unsigned imms, unsigned reg_size) {
3904 VIXL_ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize));
3905 VIXL_ASSERT(is_uint6(imms));
3906 VIXL_ASSERT((reg_size == kXRegSize) || is_uint6(imms + 3));
3907 USE(reg_size);
3908 return imms << ImmSetBits_offset;
3909 }
3910
ImmRotate(unsigned immr,unsigned reg_size)3911 static Instr ImmRotate(unsigned immr, unsigned reg_size) {
3912 VIXL_ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize));
3913 VIXL_ASSERT(((reg_size == kXRegSize) && is_uint6(immr)) ||
3914 ((reg_size == kWRegSize) && is_uint5(immr)));
3915 USE(reg_size);
3916 return immr << ImmRotate_offset;
3917 }
3918
ImmLLiteral(int imm19)3919 static Instr ImmLLiteral(int imm19) {
3920 VIXL_ASSERT(is_int19(imm19));
3921 return truncate_to_int19(imm19) << ImmLLiteral_offset;
3922 }
3923
BitN(unsigned bitn,unsigned reg_size)3924 static Instr BitN(unsigned bitn, unsigned reg_size) {
3925 VIXL_ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize));
3926 VIXL_ASSERT((reg_size == kXRegSize) || (bitn == 0));
3927 USE(reg_size);
3928 return bitn << BitN_offset;
3929 }
3930
ShiftDP(Shift shift)3931 static Instr ShiftDP(Shift shift) {
3932 VIXL_ASSERT(shift == LSL || shift == LSR || shift == ASR || shift == ROR);
3933 return shift << ShiftDP_offset;
3934 }
3935
ImmDPShift(unsigned amount)3936 static Instr ImmDPShift(unsigned amount) {
3937 VIXL_ASSERT(is_uint6(amount));
3938 return amount << ImmDPShift_offset;
3939 }
3940
ExtendMode(Extend extend)3941 static Instr ExtendMode(Extend extend) {
3942 return extend << ExtendMode_offset;
3943 }
3944
ImmExtendShift(unsigned left_shift)3945 static Instr ImmExtendShift(unsigned left_shift) {
3946 VIXL_ASSERT(left_shift <= 4);
3947 return left_shift << ImmExtendShift_offset;
3948 }
3949
ImmCondCmp(unsigned imm)3950 static Instr ImmCondCmp(unsigned imm) {
3951 VIXL_ASSERT(is_uint5(imm));
3952 return imm << ImmCondCmp_offset;
3953 }
3954
Nzcv(StatusFlags nzcv)3955 static Instr Nzcv(StatusFlags nzcv) {
3956 return ((nzcv >> Flags_offset) & 0xf) << Nzcv_offset;
3957 }
3958
3959 // MemOperand offset encoding.
ImmLSUnsigned(int imm12)3960 static Instr ImmLSUnsigned(int imm12) {
3961 VIXL_ASSERT(is_uint12(imm12));
3962 return imm12 << ImmLSUnsigned_offset;
3963 }
3964
ImmLS(int imm9)3965 static Instr ImmLS(int imm9) {
3966 VIXL_ASSERT(is_int9(imm9));
3967 return truncate_to_int9(imm9) << ImmLS_offset;
3968 }
3969
ImmLSPair(int imm7,unsigned access_size)3970 static Instr ImmLSPair(int imm7, unsigned access_size) {
3971 VIXL_ASSERT(((imm7 >> access_size) << access_size) == imm7);
3972 int scaled_imm7 = imm7 >> access_size;
3973 VIXL_ASSERT(is_int7(scaled_imm7));
3974 return truncate_to_int7(scaled_imm7) << ImmLSPair_offset;
3975 }
3976
ImmShiftLS(unsigned shift_amount)3977 static Instr ImmShiftLS(unsigned shift_amount) {
3978 VIXL_ASSERT(is_uint1(shift_amount));
3979 return shift_amount << ImmShiftLS_offset;
3980 }
3981
ImmPrefetchOperation(int imm5)3982 static Instr ImmPrefetchOperation(int imm5) {
3983 VIXL_ASSERT(is_uint5(imm5));
3984 return imm5 << ImmPrefetchOperation_offset;
3985 }
3986
ImmException(int imm16)3987 static Instr ImmException(int imm16) {
3988 VIXL_ASSERT(is_uint16(imm16));
3989 return imm16 << ImmException_offset;
3990 }
3991
ImmSystemRegister(int imm15)3992 static Instr ImmSystemRegister(int imm15) {
3993 VIXL_ASSERT(is_uint15(imm15));
3994 return imm15 << ImmSystemRegister_offset;
3995 }
3996
ImmHint(int imm7)3997 static Instr ImmHint(int imm7) {
3998 VIXL_ASSERT(is_uint7(imm7));
3999 return imm7 << ImmHint_offset;
4000 }
4001
CRm(int imm4)4002 static Instr CRm(int imm4) {
4003 VIXL_ASSERT(is_uint4(imm4));
4004 return imm4 << CRm_offset;
4005 }
4006
CRn(int imm4)4007 static Instr CRn(int imm4) {
4008 VIXL_ASSERT(is_uint4(imm4));
4009 return imm4 << CRn_offset;
4010 }
4011
SysOp(int imm14)4012 static Instr SysOp(int imm14) {
4013 VIXL_ASSERT(is_uint14(imm14));
4014 return imm14 << SysOp_offset;
4015 }
4016
ImmSysOp1(int imm3)4017 static Instr ImmSysOp1(int imm3) {
4018 VIXL_ASSERT(is_uint3(imm3));
4019 return imm3 << SysOp1_offset;
4020 }
4021
ImmSysOp2(int imm3)4022 static Instr ImmSysOp2(int imm3) {
4023 VIXL_ASSERT(is_uint3(imm3));
4024 return imm3 << SysOp2_offset;
4025 }
4026
ImmBarrierDomain(int imm2)4027 static Instr ImmBarrierDomain(int imm2) {
4028 VIXL_ASSERT(is_uint2(imm2));
4029 return imm2 << ImmBarrierDomain_offset;
4030 }
4031
ImmBarrierType(int imm2)4032 static Instr ImmBarrierType(int imm2) {
4033 VIXL_ASSERT(is_uint2(imm2));
4034 return imm2 << ImmBarrierType_offset;
4035 }
4036
4037 // Move immediates encoding.
ImmMoveWide(uint64_t imm)4038 static Instr ImmMoveWide(uint64_t imm) {
4039 VIXL_ASSERT(is_uint16(imm));
4040 return static_cast<Instr>(imm << ImmMoveWide_offset);
4041 }
4042
ShiftMoveWide(int64_t shift)4043 static Instr ShiftMoveWide(int64_t shift) {
4044 VIXL_ASSERT(is_uint2(shift));
4045 return static_cast<Instr>(shift << ShiftMoveWide_offset);
4046 }
4047
4048 // FP Immediates.
4049 static Instr ImmFP32(float imm);
4050 static Instr ImmFP64(double imm);
4051
4052 // FP register type.
FPType(FPRegister fd)4053 static Instr FPType(FPRegister fd) {
4054 return fd.Is64Bits() ? FP64 : FP32;
4055 }
4056
FPScale(unsigned scale)4057 static Instr FPScale(unsigned scale) {
4058 VIXL_ASSERT(is_uint6(scale));
4059 return scale << FPScale_offset;
4060 }
4061
4062 // Immediate field checking helpers.
4063 static bool IsImmAddSub(int64_t immediate);
4064 static bool IsImmConditionalCompare(int64_t immediate);
4065 static bool IsImmFP32(float imm);
4066 static bool IsImmFP64(double imm);
4067 static bool IsImmLogical(uint64_t value,
4068 unsigned width,
4069 unsigned* n = NULL,
4070 unsigned* imm_s = NULL,
4071 unsigned* imm_r = NULL);
4072 static bool IsImmLSPair(int64_t offset, unsigned access_size);
4073 static bool IsImmLSScaled(int64_t offset, unsigned access_size);
4074 static bool IsImmLSUnscaled(int64_t offset);
4075 static bool IsImmMovn(uint64_t imm, unsigned reg_size);
4076 static bool IsImmMovz(uint64_t imm, unsigned reg_size);
4077
4078 // Instruction bits for vector format in data processing operations.
VFormat(VRegister vd)4079 static Instr VFormat(VRegister vd) {
4080 if (vd.Is64Bits()) {
4081 switch (vd.lanes()) {
4082 case 2: return NEON_2S;
4083 case 4: return NEON_4H;
4084 case 8: return NEON_8B;
4085 default: return 0xffffffff;
4086 }
4087 } else {
4088 VIXL_ASSERT(vd.Is128Bits());
4089 switch (vd.lanes()) {
4090 case 2: return NEON_2D;
4091 case 4: return NEON_4S;
4092 case 8: return NEON_8H;
4093 case 16: return NEON_16B;
4094 default: return 0xffffffff;
4095 }
4096 }
4097 }
4098
4099 // Instruction bits for vector format in floating point data processing
4100 // operations.
FPFormat(VRegister vd)4101 static Instr FPFormat(VRegister vd) {
4102 if (vd.lanes() == 1) {
4103 // Floating point scalar formats.
4104 VIXL_ASSERT(vd.Is32Bits() || vd.Is64Bits());
4105 return vd.Is64Bits() ? FP64 : FP32;
4106 }
4107
4108 // Two lane floating point vector formats.
4109 if (vd.lanes() == 2) {
4110 VIXL_ASSERT(vd.Is64Bits() || vd.Is128Bits());
4111 return vd.Is128Bits() ? NEON_FP_2D : NEON_FP_2S;
4112 }
4113
4114 // Four lane floating point vector format.
4115 VIXL_ASSERT((vd.lanes() == 4) && vd.Is128Bits());
4116 return NEON_FP_4S;
4117 }
4118
4119 // Instruction bits for vector format in load and store operations.
LSVFormat(VRegister vd)4120 static Instr LSVFormat(VRegister vd) {
4121 if (vd.Is64Bits()) {
4122 switch (vd.lanes()) {
4123 case 1: return LS_NEON_1D;
4124 case 2: return LS_NEON_2S;
4125 case 4: return LS_NEON_4H;
4126 case 8: return LS_NEON_8B;
4127 default: return 0xffffffff;
4128 }
4129 } else {
4130 VIXL_ASSERT(vd.Is128Bits());
4131 switch (vd.lanes()) {
4132 case 2: return LS_NEON_2D;
4133 case 4: return LS_NEON_4S;
4134 case 8: return LS_NEON_8H;
4135 case 16: return LS_NEON_16B;
4136 default: return 0xffffffff;
4137 }
4138 }
4139 }
4140
4141 // Instruction bits for scalar format in data processing operations.
SFormat(VRegister vd)4142 static Instr SFormat(VRegister vd) {
4143 VIXL_ASSERT(vd.lanes() == 1);
4144 switch (vd.SizeInBytes()) {
4145 case 1: return NEON_B;
4146 case 2: return NEON_H;
4147 case 4: return NEON_S;
4148 case 8: return NEON_D;
4149 default: return 0xffffffff;
4150 }
4151 }
4152
ImmNEONHLM(int index,int num_bits)4153 static Instr ImmNEONHLM(int index, int num_bits) {
4154 int h, l, m;
4155 if (num_bits == 3) {
4156 VIXL_ASSERT(is_uint3(index));
4157 h = (index >> 2) & 1;
4158 l = (index >> 1) & 1;
4159 m = (index >> 0) & 1;
4160 } else if (num_bits == 2) {
4161 VIXL_ASSERT(is_uint2(index));
4162 h = (index >> 1) & 1;
4163 l = (index >> 0) & 1;
4164 m = 0;
4165 } else {
4166 VIXL_ASSERT(is_uint1(index) && (num_bits == 1));
4167 h = (index >> 0) & 1;
4168 l = 0;
4169 m = 0;
4170 }
4171 return (h << NEONH_offset) | (l << NEONL_offset) | (m << NEONM_offset);
4172 }
4173
ImmNEONExt(int imm4)4174 static Instr ImmNEONExt(int imm4) {
4175 VIXL_ASSERT(is_uint4(imm4));
4176 return imm4 << ImmNEONExt_offset;
4177 }
4178
ImmNEON5(Instr format,int index)4179 static Instr ImmNEON5(Instr format, int index) {
4180 VIXL_ASSERT(is_uint4(index));
4181 int s = LaneSizeInBytesLog2FromFormat(static_cast<VectorFormat>(format));
4182 int imm5 = (index << (s + 1)) | (1 << s);
4183 return imm5 << ImmNEON5_offset;
4184 }
4185
ImmNEON4(Instr format,int index)4186 static Instr ImmNEON4(Instr format, int index) {
4187 VIXL_ASSERT(is_uint4(index));
4188 int s = LaneSizeInBytesLog2FromFormat(static_cast<VectorFormat>(format));
4189 int imm4 = index << s;
4190 return imm4 << ImmNEON4_offset;
4191 }
4192
ImmNEONabcdefgh(int imm8)4193 static Instr ImmNEONabcdefgh(int imm8) {
4194 VIXL_ASSERT(is_uint8(imm8));
4195 Instr instr;
4196 instr = ((imm8 >> 5) & 7) << ImmNEONabc_offset;
4197 instr |= (imm8 & 0x1f) << ImmNEONdefgh_offset;
4198 return instr;
4199 }
4200
NEONCmode(int cmode)4201 static Instr NEONCmode(int cmode) {
4202 VIXL_ASSERT(is_uint4(cmode));
4203 return cmode << NEONCmode_offset;
4204 }
4205
NEONModImmOp(int op)4206 static Instr NEONModImmOp(int op) {
4207 VIXL_ASSERT(is_uint1(op));
4208 return op << NEONModImmOp_offset;
4209 }
4210
4211 // Size of the code generated since label to the current position.
SizeOfCodeGeneratedSince(Label * label)4212 size_t SizeOfCodeGeneratedSince(Label* label) const {
4213 VIXL_ASSERT(label->IsBound());
4214 return buffer_->OffsetFrom(label->location());
4215 }
4216
SizeOfCodeGenerated()4217 size_t SizeOfCodeGenerated() const {
4218 return buffer_->CursorOffset();
4219 }
4220
BufferCapacity()4221 size_t BufferCapacity() const { return buffer_->capacity(); }
4222
RemainingBufferSpace()4223 size_t RemainingBufferSpace() const { return buffer_->RemainingBytes(); }
4224
EnsureSpaceFor(size_t amount)4225 void EnsureSpaceFor(size_t amount) {
4226 if (buffer_->RemainingBytes() < amount) {
4227 size_t capacity = buffer_->capacity();
4228 size_t size = buffer_->CursorOffset();
4229 do {
4230 // TODO(all): refine.
4231 capacity *= 2;
4232 } while ((capacity - size) < amount);
4233 buffer_->Grow(capacity);
4234 }
4235 }
4236
4237 #ifdef VIXL_DEBUG
AcquireBuffer()4238 void AcquireBuffer() {
4239 VIXL_ASSERT(buffer_monitor_ >= 0);
4240 buffer_monitor_++;
4241 }
4242
ReleaseBuffer()4243 void ReleaseBuffer() {
4244 buffer_monitor_--;
4245 VIXL_ASSERT(buffer_monitor_ >= 0);
4246 }
4247 #endif
4248
pic()4249 PositionIndependentCodeOption pic() const {
4250 return pic_;
4251 }
4252
AllowPageOffsetDependentCode()4253 bool AllowPageOffsetDependentCode() const {
4254 return (pic() == PageOffsetDependentCode) ||
4255 (pic() == PositionDependentCode);
4256 }
4257
AppropriateZeroRegFor(const CPURegister & reg)4258 static const Register& AppropriateZeroRegFor(const CPURegister& reg) {
4259 return reg.Is64Bits() ? xzr : wzr;
4260 }
4261
4262
4263 protected:
4264 void LoadStore(const CPURegister& rt,
4265 const MemOperand& addr,
4266 LoadStoreOp op,
4267 LoadStoreScalingOption option = PreferScaledOffset);
4268
4269 void LoadStorePair(const CPURegister& rt,
4270 const CPURegister& rt2,
4271 const MemOperand& addr,
4272 LoadStorePairOp op);
4273 void LoadStoreStruct(const VRegister& vt,
4274 const MemOperand& addr,
4275 NEONLoadStoreMultiStructOp op);
4276 void LoadStoreStruct1(const VRegister& vt,
4277 int reg_count,
4278 const MemOperand& addr);
4279 void LoadStoreStructSingle(const VRegister& vt,
4280 uint32_t lane,
4281 const MemOperand& addr,
4282 NEONLoadStoreSingleStructOp op);
4283 void LoadStoreStructSingleAllLanes(const VRegister& vt,
4284 const MemOperand& addr,
4285 NEONLoadStoreSingleStructOp op);
4286 void LoadStoreStructVerify(const VRegister& vt,
4287 const MemOperand& addr,
4288 Instr op);
4289
4290 void Prefetch(PrefetchOperation op,
4291 const MemOperand& addr,
4292 LoadStoreScalingOption option = PreferScaledOffset);
4293
4294 // TODO(all): The third parameter should be passed by reference but gcc 4.8.2
4295 // reports a bogus uninitialised warning then.
4296 void Logical(const Register& rd,
4297 const Register& rn,
4298 const Operand operand,
4299 LogicalOp op);
4300 void LogicalImmediate(const Register& rd,
4301 const Register& rn,
4302 unsigned n,
4303 unsigned imm_s,
4304 unsigned imm_r,
4305 LogicalOp op);
4306
4307 void ConditionalCompare(const Register& rn,
4308 const Operand& operand,
4309 StatusFlags nzcv,
4310 Condition cond,
4311 ConditionalCompareOp op);
4312
4313 void AddSubWithCarry(const Register& rd,
4314 const Register& rn,
4315 const Operand& operand,
4316 FlagsUpdate S,
4317 AddSubWithCarryOp op);
4318
4319
4320 // Functions for emulating operands not directly supported by the instruction
4321 // set.
4322 void EmitShift(const Register& rd,
4323 const Register& rn,
4324 Shift shift,
4325 unsigned amount);
4326 void EmitExtendShift(const Register& rd,
4327 const Register& rn,
4328 Extend extend,
4329 unsigned left_shift);
4330
4331 void AddSub(const Register& rd,
4332 const Register& rn,
4333 const Operand& operand,
4334 FlagsUpdate S,
4335 AddSubOp op);
4336
4337 void NEONTable(const VRegister& vd,
4338 const VRegister& vn,
4339 const VRegister& vm,
4340 NEONTableOp op);
4341
4342 // Find an appropriate LoadStoreOp or LoadStorePairOp for the specified
4343 // registers. Only simple loads are supported; sign- and zero-extension (such
4344 // as in LDPSW_x or LDRB_w) are not supported.
4345 static LoadStoreOp LoadOpFor(const CPURegister& rt);
4346 static LoadStorePairOp LoadPairOpFor(const CPURegister& rt,
4347 const CPURegister& rt2);
4348 static LoadStoreOp StoreOpFor(const CPURegister& rt);
4349 static LoadStorePairOp StorePairOpFor(const CPURegister& rt,
4350 const CPURegister& rt2);
4351 static LoadStorePairNonTemporalOp LoadPairNonTemporalOpFor(
4352 const CPURegister& rt, const CPURegister& rt2);
4353 static LoadStorePairNonTemporalOp StorePairNonTemporalOpFor(
4354 const CPURegister& rt, const CPURegister& rt2);
4355 static LoadLiteralOp LoadLiteralOpFor(const CPURegister& rt);
4356
4357
4358 private:
4359 static uint32_t FP32ToImm8(float imm);
4360 static uint32_t FP64ToImm8(double imm);
4361
4362 // Instruction helpers.
4363 void MoveWide(const Register& rd,
4364 uint64_t imm,
4365 int shift,
4366 MoveWideImmediateOp mov_op);
4367 void DataProcShiftedRegister(const Register& rd,
4368 const Register& rn,
4369 const Operand& operand,
4370 FlagsUpdate S,
4371 Instr op);
4372 void DataProcExtendedRegister(const Register& rd,
4373 const Register& rn,
4374 const Operand& operand,
4375 FlagsUpdate S,
4376 Instr op);
4377 void LoadStorePairNonTemporal(const CPURegister& rt,
4378 const CPURegister& rt2,
4379 const MemOperand& addr,
4380 LoadStorePairNonTemporalOp op);
4381 void LoadLiteral(const CPURegister& rt, uint64_t imm, LoadLiteralOp op);
4382 void ConditionalSelect(const Register& rd,
4383 const Register& rn,
4384 const Register& rm,
4385 Condition cond,
4386 ConditionalSelectOp op);
4387 void DataProcessing1Source(const Register& rd,
4388 const Register& rn,
4389 DataProcessing1SourceOp op);
4390 void DataProcessing3Source(const Register& rd,
4391 const Register& rn,
4392 const Register& rm,
4393 const Register& ra,
4394 DataProcessing3SourceOp op);
4395 void FPDataProcessing1Source(const VRegister& fd,
4396 const VRegister& fn,
4397 FPDataProcessing1SourceOp op);
4398 void FPDataProcessing3Source(const VRegister& fd,
4399 const VRegister& fn,
4400 const VRegister& fm,
4401 const VRegister& fa,
4402 FPDataProcessing3SourceOp op);
4403 void NEONAcrossLanesL(const VRegister& vd,
4404 const VRegister& vn,
4405 NEONAcrossLanesOp op);
4406 void NEONAcrossLanes(const VRegister& vd,
4407 const VRegister& vn,
4408 NEONAcrossLanesOp op);
4409 void NEONModifiedImmShiftLsl(const VRegister& vd,
4410 const int imm8,
4411 const int left_shift,
4412 NEONModifiedImmediateOp op);
4413 void NEONModifiedImmShiftMsl(const VRegister& vd,
4414 const int imm8,
4415 const int shift_amount,
4416 NEONModifiedImmediateOp op);
4417 void NEONFP2Same(const VRegister& vd,
4418 const VRegister& vn,
4419 Instr vop);
4420 void NEON3Same(const VRegister& vd,
4421 const VRegister& vn,
4422 const VRegister& vm,
4423 NEON3SameOp vop);
4424 void NEONFP3Same(const VRegister& vd,
4425 const VRegister& vn,
4426 const VRegister& vm,
4427 Instr op);
4428 void NEON3DifferentL(const VRegister& vd,
4429 const VRegister& vn,
4430 const VRegister& vm,
4431 NEON3DifferentOp vop);
4432 void NEON3DifferentW(const VRegister& vd,
4433 const VRegister& vn,
4434 const VRegister& vm,
4435 NEON3DifferentOp vop);
4436 void NEON3DifferentHN(const VRegister& vd,
4437 const VRegister& vn,
4438 const VRegister& vm,
4439 NEON3DifferentOp vop);
4440 void NEONFP2RegMisc(const VRegister& vd,
4441 const VRegister& vn,
4442 NEON2RegMiscOp vop,
4443 double value = 0.0);
4444 void NEON2RegMisc(const VRegister& vd,
4445 const VRegister& vn,
4446 NEON2RegMiscOp vop,
4447 int value = 0);
4448 void NEONFP2RegMisc(const VRegister& vd,
4449 const VRegister& vn,
4450 Instr op);
4451 void NEONAddlp(const VRegister& vd,
4452 const VRegister& vn,
4453 NEON2RegMiscOp op);
4454 void NEONPerm(const VRegister& vd,
4455 const VRegister& vn,
4456 const VRegister& vm,
4457 NEONPermOp op);
4458 void NEONFPByElement(const VRegister& vd,
4459 const VRegister& vn,
4460 const VRegister& vm,
4461 int vm_index,
4462 NEONByIndexedElementOp op);
4463 void NEONByElement(const VRegister& vd,
4464 const VRegister& vn,
4465 const VRegister& vm,
4466 int vm_index,
4467 NEONByIndexedElementOp op);
4468 void NEONByElementL(const VRegister& vd,
4469 const VRegister& vn,
4470 const VRegister& vm,
4471 int vm_index,
4472 NEONByIndexedElementOp op);
4473 void NEONShiftImmediate(const VRegister& vd,
4474 const VRegister& vn,
4475 NEONShiftImmediateOp op,
4476 int immh_immb);
4477 void NEONShiftLeftImmediate(const VRegister& vd,
4478 const VRegister& vn,
4479 int shift,
4480 NEONShiftImmediateOp op);
4481 void NEONShiftRightImmediate(const VRegister& vd,
4482 const VRegister& vn,
4483 int shift,
4484 NEONShiftImmediateOp op);
4485 void NEONShiftImmediateL(const VRegister& vd,
4486 const VRegister& vn,
4487 int shift,
4488 NEONShiftImmediateOp op);
4489 void NEONShiftImmediateN(const VRegister& vd,
4490 const VRegister& vn,
4491 int shift,
4492 NEONShiftImmediateOp op);
4493 void NEONXtn(const VRegister& vd,
4494 const VRegister& vn,
4495 NEON2RegMiscOp vop);
4496
4497 Instr LoadStoreStructAddrModeField(const MemOperand& addr);
4498
4499 // Encode the specified MemOperand for the specified access size and scaling
4500 // preference.
4501 Instr LoadStoreMemOperand(const MemOperand& addr,
4502 unsigned access_size,
4503 LoadStoreScalingOption option);
4504
4505 // Link the current (not-yet-emitted) instruction to the specified label, then
4506 // return an offset to be encoded in the instruction. If the label is not yet
4507 // bound, an offset of 0 is returned.
4508 ptrdiff_t LinkAndGetByteOffsetTo(Label * label);
4509 ptrdiff_t LinkAndGetInstructionOffsetTo(Label * label);
4510 ptrdiff_t LinkAndGetPageOffsetTo(Label * label);
4511
4512 // A common implementation for the LinkAndGet<Type>OffsetTo helpers.
4513 template <int element_shift>
4514 ptrdiff_t LinkAndGetOffsetTo(Label* label);
4515
4516 // Literal load offset are in words (32-bit).
4517 ptrdiff_t LinkAndGetWordOffsetTo(RawLiteral* literal);
4518
4519 // Emit the instruction in buffer_.
Emit(Instr instruction)4520 void Emit(Instr instruction) {
4521 VIXL_STATIC_ASSERT(sizeof(instruction) == kInstructionSize);
4522 VIXL_ASSERT(buffer_monitor_ > 0);
4523 buffer_->Emit32(instruction);
4524 }
4525
4526 // Buffer where the code is emitted.
4527 CodeBuffer* buffer_;
4528 PositionIndependentCodeOption pic_;
4529
4530 #ifdef VIXL_DEBUG
4531 int64_t buffer_monitor_;
4532 #endif
4533 };
4534
4535
4536 // All Assembler emits MUST acquire/release the underlying code buffer. The
4537 // helper scope below will do so and optionally ensure the buffer is big enough
4538 // to receive the emit. It is possible to request the scope not to perform any
4539 // checks (kNoCheck) if for example it is known in advance the buffer size is
4540 // adequate or there is some other size checking mechanism in place.
4541 class CodeBufferCheckScope {
4542 public:
4543 // Tell whether or not the scope needs to ensure the associated CodeBuffer
4544 // has enough space for the requested size.
4545 enum CheckPolicy {
4546 kNoCheck,
4547 kCheck
4548 };
4549
4550 // Tell whether or not the scope should assert the amount of code emitted
4551 // within the scope is consistent with the requested amount.
4552 enum AssertPolicy {
4553 kNoAssert, // No assert required.
4554 kExactSize, // The code emitted must be exactly size bytes.
4555 kMaximumSize // The code emitted must be at most size bytes.
4556 };
4557
4558 CodeBufferCheckScope(Assembler* assm,
4559 size_t size,
4560 CheckPolicy check_policy = kCheck,
4561 AssertPolicy assert_policy = kMaximumSize)
assm_(assm)4562 : assm_(assm) {
4563 if (check_policy == kCheck) assm->EnsureSpaceFor(size);
4564 #ifdef VIXL_DEBUG
4565 assm->bind(&start_);
4566 size_ = size;
4567 assert_policy_ = assert_policy;
4568 assm->AcquireBuffer();
4569 #else
4570 USE(assert_policy);
4571 #endif
4572 }
4573
4574 // This is a shortcut for CodeBufferCheckScope(assm, 0, kNoCheck, kNoAssert).
CodeBufferCheckScope(Assembler * assm)4575 explicit CodeBufferCheckScope(Assembler* assm) : assm_(assm) {
4576 #ifdef VIXL_DEBUG
4577 size_ = 0;
4578 assert_policy_ = kNoAssert;
4579 assm->AcquireBuffer();
4580 #endif
4581 }
4582
~CodeBufferCheckScope()4583 ~CodeBufferCheckScope() {
4584 #ifdef VIXL_DEBUG
4585 assm_->ReleaseBuffer();
4586 switch (assert_policy_) {
4587 case kNoAssert: break;
4588 case kExactSize:
4589 VIXL_ASSERT(assm_->SizeOfCodeGeneratedSince(&start_) == size_);
4590 break;
4591 case kMaximumSize:
4592 VIXL_ASSERT(assm_->SizeOfCodeGeneratedSince(&start_) <= size_);
4593 break;
4594 default:
4595 VIXL_UNREACHABLE();
4596 }
4597 #endif
4598 }
4599
4600 protected:
4601 Assembler* assm_;
4602 #ifdef VIXL_DEBUG
4603 Label start_;
4604 size_t size_;
4605 AssertPolicy assert_policy_;
4606 #endif
4607 };
4608
4609
4610 template <typename T>
UpdateValue(T new_value,const Assembler * assembler)4611 void Literal<T>::UpdateValue(T new_value, const Assembler* assembler) {
4612 return UpdateValue(new_value, assembler->GetStartAddress<uint8_t*>());
4613 }
4614
4615
4616 template <typename T>
UpdateValue(T high64,T low64,const Assembler * assembler)4617 void Literal<T>::UpdateValue(T high64, T low64, const Assembler* assembler) {
4618 return UpdateValue(high64, low64, assembler->GetStartAddress<uint8_t*>());
4619 }
4620
4621
4622 } // namespace vixl
4623
4624 #endif // VIXL_A64_ASSEMBLER_A64_H_
4625