1 // Copyright 2015, ARM Limited
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are met:
6 //
7 // * Redistributions of source code must retain the above copyright notice,
8 // this list of conditions and the following disclaimer.
9 // * Redistributions in binary form must reproduce the above copyright notice,
10 // this list of conditions and the following disclaimer in the documentation
11 // and/or other materials provided with the distribution.
12 // * Neither the name of ARM Limited nor the names of its contributors may be
13 // used to endorse or promote products derived from this software without
14 // specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
27 #ifndef VIXL_A64_SIMULATOR_A64_H_
28 #define VIXL_A64_SIMULATOR_A64_H_
29
30 #include "vixl/globals.h"
31 #include "vixl/utils.h"
32 #include "vixl/a64/instructions-a64.h"
33 #include "vixl/a64/assembler-a64.h"
34 #include "vixl/a64/disasm-a64.h"
35 #include "vixl/a64/instrument-a64.h"
36
37 namespace vixl {
38
39 // Debug instructions.
40 //
41 // VIXL's macro-assembler and simulator support a few pseudo instructions to
42 // make debugging easier. These pseudo instructions do not exist on real
43 // hardware.
44 //
45 // TODO: Provide controls to prevent the macro assembler from emitting
46 // pseudo-instructions. This is important for ahead-of-time compilers, where the
47 // macro assembler is built with USE_SIMULATOR but the code will eventually be
48 // run on real hardware.
49 //
50 // TODO: Also consider allowing these pseudo-instructions to be disabled in the
51 // simulator, so that users can check that the input is a valid native code.
52 // (This isn't possible in all cases. Printf won't work, for example.)
53 //
54 // Each debug pseudo instruction is represented by a HLT instruction. The HLT
55 // immediate field is used to identify the type of debug pseudo instruction.
56
57 enum DebugHltOpcodes {
58 kUnreachableOpcode = 0xdeb0,
59 kPrintfOpcode,
60 kTraceOpcode,
61 kLogOpcode,
62 // Aliases.
63 kDebugHltFirstOpcode = kUnreachableOpcode,
64 kDebugHltLastOpcode = kLogOpcode
65 };
66
67 // Each pseudo instruction uses a custom encoding for additional arguments, as
68 // described below.
69
70 // Unreachable - kUnreachableOpcode
71 //
72 // Instruction which should never be executed. This is used as a guard in parts
73 // of the code that should not be reachable, such as in data encoded inline in
74 // the instructions.
75
76 // Printf - kPrintfOpcode
77 // - arg_count: The number of arguments.
78 // - arg_pattern: A set of PrintfArgPattern values, packed into two-bit fields.
79 //
80 // Simulate a call to printf.
81 //
82 // Floating-point and integer arguments are passed in separate sets of registers
83 // in AAPCS64 (even for varargs functions), so it is not possible to determine
84 // the type of each argument without some information about the values that were
85 // passed in. This information could be retrieved from the printf format string,
86 // but the format string is not trivial to parse so we encode the relevant
87 // information with the HLT instruction.
88 //
89 // Also, the following registers are populated (as if for a native A64 call):
90 // x0: The format string
91 // x1-x7: Optional arguments, if type == CPURegister::kRegister
92 // d0-d7: Optional arguments, if type == CPURegister::kFPRegister
93 const unsigned kPrintfArgCountOffset = 1 * kInstructionSize;
94 const unsigned kPrintfArgPatternListOffset = 2 * kInstructionSize;
95 const unsigned kPrintfLength = 3 * kInstructionSize;
96
97 const unsigned kPrintfMaxArgCount = 4;
98
99 // The argument pattern is a set of two-bit-fields, each with one of the
100 // following values:
101 enum PrintfArgPattern {
102 kPrintfArgW = 1,
103 kPrintfArgX = 2,
104 // There is no kPrintfArgS because floats are always converted to doubles in C
105 // varargs calls.
106 kPrintfArgD = 3
107 };
108 static const unsigned kPrintfArgPatternBits = 2;
109
110 // Trace - kTraceOpcode
111 // - parameter: TraceParameter stored as a uint32_t
112 // - command: TraceCommand stored as a uint32_t
113 //
114 // Allow for trace management in the generated code. This enables or disables
115 // automatic tracing of the specified information for every simulated
116 // instruction.
117 const unsigned kTraceParamsOffset = 1 * kInstructionSize;
118 const unsigned kTraceCommandOffset = 2 * kInstructionSize;
119 const unsigned kTraceLength = 3 * kInstructionSize;
120
121 // Trace parameters.
122 enum TraceParameters {
123 LOG_DISASM = 1 << 0, // Log disassembly.
124 LOG_REGS = 1 << 1, // Log general purpose registers.
125 LOG_VREGS = 1 << 2, // Log NEON and floating-point registers.
126 LOG_SYSREGS = 1 << 3, // Log the flags and system registers.
127 LOG_WRITE = 1 << 4, // Log writes to memory.
128
129 LOG_NONE = 0,
130 LOG_STATE = LOG_REGS | LOG_VREGS | LOG_SYSREGS,
131 LOG_ALL = LOG_DISASM | LOG_STATE | LOG_WRITE
132 };
133
134 // Trace commands.
135 enum TraceCommand {
136 TRACE_ENABLE = 1,
137 TRACE_DISABLE = 2
138 };
139
140 // Log - kLogOpcode
141 // - parameter: TraceParameter stored as a uint32_t
142 //
143 // Print the specified information once. This mechanism is separate from Trace.
144 // In particular, _all_ of the specified registers are printed, rather than just
145 // the registers that the instruction writes.
146 //
147 // Any combination of the TraceParameters values can be used, except that
148 // LOG_DISASM is not supported for Log.
149 const unsigned kLogParamsOffset = 1 * kInstructionSize;
150 const unsigned kLogLength = 2 * kInstructionSize;
151
152
153 // Assemble the specified IEEE-754 components into the target type and apply
154 // appropriate rounding.
155 // sign: 0 = positive, 1 = negative
156 // exponent: Unbiased IEEE-754 exponent.
157 // mantissa: The mantissa of the input. The top bit (which is not encoded for
158 // normal IEEE-754 values) must not be omitted. This bit has the
159 // value 'pow(2, exponent)'.
160 //
161 // The input value is assumed to be a normalized value. That is, the input may
162 // not be infinity or NaN. If the source value is subnormal, it must be
163 // normalized before calling this function such that the highest set bit in the
164 // mantissa has the value 'pow(2, exponent)'.
165 //
166 // Callers should use FPRoundToFloat or FPRoundToDouble directly, rather than
167 // calling a templated FPRound.
168 template <class T, int ebits, int mbits>
FPRound(int64_t sign,int64_t exponent,uint64_t mantissa,FPRounding round_mode)169 T FPRound(int64_t sign, int64_t exponent, uint64_t mantissa,
170 FPRounding round_mode) {
171 VIXL_ASSERT((sign == 0) || (sign == 1));
172
173 // Only FPTieEven and FPRoundOdd rounding modes are implemented.
174 VIXL_ASSERT((round_mode == FPTieEven) || (round_mode == FPRoundOdd));
175
176 // Rounding can promote subnormals to normals, and normals to infinities. For
177 // example, a double with exponent 127 (FLT_MAX_EXP) would appear to be
178 // encodable as a float, but rounding based on the low-order mantissa bits
179 // could make it overflow. With ties-to-even rounding, this value would become
180 // an infinity.
181
182 // ---- Rounding Method ----
183 //
184 // The exponent is irrelevant in the rounding operation, so we treat the
185 // lowest-order bit that will fit into the result ('onebit') as having
186 // the value '1'. Similarly, the highest-order bit that won't fit into
187 // the result ('halfbit') has the value '0.5'. The 'point' sits between
188 // 'onebit' and 'halfbit':
189 //
190 // These bits fit into the result.
191 // |---------------------|
192 // mantissa = 0bxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
193 // ||
194 // / |
195 // / halfbit
196 // onebit
197 //
198 // For subnormal outputs, the range of representable bits is smaller and
199 // the position of onebit and halfbit depends on the exponent of the
200 // input, but the method is otherwise similar.
201 //
202 // onebit(frac)
203 // |
204 // | halfbit(frac) halfbit(adjusted)
205 // | / /
206 // | | |
207 // 0b00.0 (exact) -> 0b00.0 (exact) -> 0b00
208 // 0b00.0... -> 0b00.0... -> 0b00
209 // 0b00.1 (exact) -> 0b00.0111..111 -> 0b00
210 // 0b00.1... -> 0b00.1... -> 0b01
211 // 0b01.0 (exact) -> 0b01.0 (exact) -> 0b01
212 // 0b01.0... -> 0b01.0... -> 0b01
213 // 0b01.1 (exact) -> 0b01.1 (exact) -> 0b10
214 // 0b01.1... -> 0b01.1... -> 0b10
215 // 0b10.0 (exact) -> 0b10.0 (exact) -> 0b10
216 // 0b10.0... -> 0b10.0... -> 0b10
217 // 0b10.1 (exact) -> 0b10.0111..111 -> 0b10
218 // 0b10.1... -> 0b10.1... -> 0b11
219 // 0b11.0 (exact) -> 0b11.0 (exact) -> 0b11
220 // ... / | / |
221 // / | / |
222 // / |
223 // adjusted = frac - (halfbit(mantissa) & ~onebit(frac)); / |
224 //
225 // mantissa = (mantissa >> shift) + halfbit(adjusted);
226
227 static const int mantissa_offset = 0;
228 static const int exponent_offset = mantissa_offset + mbits;
229 static const int sign_offset = exponent_offset + ebits;
230 VIXL_ASSERT(sign_offset == (sizeof(T) * 8 - 1));
231
232 // Bail out early for zero inputs.
233 if (mantissa == 0) {
234 return sign << sign_offset;
235 }
236
237 // If all bits in the exponent are set, the value is infinite or NaN.
238 // This is true for all binary IEEE-754 formats.
239 static const int infinite_exponent = (1 << ebits) - 1;
240 static const int max_normal_exponent = infinite_exponent - 1;
241
242 // Apply the exponent bias to encode it for the result. Doing this early makes
243 // it easy to detect values that will be infinite or subnormal.
244 exponent += max_normal_exponent >> 1;
245
246 if (exponent > max_normal_exponent) {
247 // Overflow: the input is too large for the result type to represent.
248 if (round_mode == FPTieEven) {
249 // FPTieEven rounding mode handles overflows using infinities.
250 exponent = infinite_exponent;
251 mantissa = 0;
252 } else {
253 VIXL_ASSERT(round_mode == FPRoundOdd);
254 // FPRoundOdd rounding mode handles overflows using the largest magnitude
255 // normal number.
256 exponent = max_normal_exponent;
257 mantissa = (UINT64_C(1) << exponent_offset) - 1;
258 }
259 return (sign << sign_offset) |
260 (exponent << exponent_offset) |
261 (mantissa << mantissa_offset);
262 }
263
264 // Calculate the shift required to move the top mantissa bit to the proper
265 // place in the destination type.
266 const int highest_significant_bit = 63 - CountLeadingZeros(mantissa);
267 int shift = highest_significant_bit - mbits;
268
269 if (exponent <= 0) {
270 // The output will be subnormal (before rounding).
271 // For subnormal outputs, the shift must be adjusted by the exponent. The +1
272 // is necessary because the exponent of a subnormal value (encoded as 0) is
273 // the same as the exponent of the smallest normal value (encoded as 1).
274 shift += -exponent + 1;
275
276 // Handle inputs that would produce a zero output.
277 //
278 // Shifts higher than highest_significant_bit+1 will always produce a zero
279 // result. A shift of exactly highest_significant_bit+1 might produce a
280 // non-zero result after rounding.
281 if (shift > (highest_significant_bit + 1)) {
282 if (round_mode == FPTieEven) {
283 // The result will always be +/-0.0.
284 return sign << sign_offset;
285 } else {
286 VIXL_ASSERT(round_mode == FPRoundOdd);
287 VIXL_ASSERT(mantissa != 0);
288 // For FPRoundOdd, if the mantissa is too small to represent and
289 // non-zero return the next "odd" value.
290 return (sign << sign_offset) | 1;
291 }
292 }
293
294 // Properly encode the exponent for a subnormal output.
295 exponent = 0;
296 } else {
297 // Clear the topmost mantissa bit, since this is not encoded in IEEE-754
298 // normal values.
299 mantissa &= ~(UINT64_C(1) << highest_significant_bit);
300 }
301
302 if (shift > 0) {
303 if (round_mode == FPTieEven) {
304 // We have to shift the mantissa to the right. Some precision is lost, so
305 // we need to apply rounding.
306 uint64_t onebit_mantissa = (mantissa >> (shift)) & 1;
307 uint64_t halfbit_mantissa = (mantissa >> (shift-1)) & 1;
308 uint64_t adjustment = (halfbit_mantissa & ~onebit_mantissa);
309 uint64_t adjusted = mantissa - adjustment;
310 T halfbit_adjusted = (adjusted >> (shift-1)) & 1;
311
312 T result = (sign << sign_offset) |
313 (exponent << exponent_offset) |
314 ((mantissa >> shift) << mantissa_offset);
315
316 // A very large mantissa can overflow during rounding. If this happens,
317 // the exponent should be incremented and the mantissa set to 1.0
318 // (encoded as 0). Applying halfbit_adjusted after assembling the float
319 // has the nice side-effect that this case is handled for free.
320 //
321 // This also handles cases where a very large finite value overflows to
322 // infinity, or where a very large subnormal value overflows to become
323 // normal.
324 return result + halfbit_adjusted;
325 } else {
326 VIXL_ASSERT(round_mode == FPRoundOdd);
327 // If any bits at position halfbit or below are set, onebit (ie. the
328 // bottom bit of the resulting mantissa) must be set.
329 uint64_t fractional_bits = mantissa & ((UINT64_C(1) << shift) - 1);
330 if (fractional_bits != 0) {
331 mantissa |= UINT64_C(1) << shift;
332 }
333
334 return (sign << sign_offset) |
335 (exponent << exponent_offset) |
336 ((mantissa >> shift) << mantissa_offset);
337 }
338 } else {
339 // We have to shift the mantissa to the left (or not at all). The input
340 // mantissa is exactly representable in the output mantissa, so apply no
341 // rounding correction.
342 return (sign << sign_offset) |
343 (exponent << exponent_offset) |
344 ((mantissa << -shift) << mantissa_offset);
345 }
346 }
347
348
349 // Representation of memory, with typed getters and setters for access.
350 class Memory {
351 public:
352 template <typename T>
AddressUntag(T address)353 static T AddressUntag(T address) {
354 // Cast the address using a C-style cast. A reinterpret_cast would be
355 // appropriate, but it can't cast one integral type to another.
356 uint64_t bits = (uint64_t)address;
357 return (T)(bits & ~kAddressTagMask);
358 }
359
360 template <typename T, typename A>
Read(A address)361 static T Read(A address) {
362 T value;
363 address = AddressUntag(address);
364 VIXL_ASSERT((sizeof(value) == 1) || (sizeof(value) == 2) ||
365 (sizeof(value) == 4) || (sizeof(value) == 8) ||
366 (sizeof(value) == 16));
367 memcpy(&value, reinterpret_cast<const char *>(address), sizeof(value));
368 return value;
369 }
370
371 template <typename T, typename A>
Write(A address,T value)372 static void Write(A address, T value) {
373 address = AddressUntag(address);
374 VIXL_ASSERT((sizeof(value) == 1) || (sizeof(value) == 2) ||
375 (sizeof(value) == 4) || (sizeof(value) == 8) ||
376 (sizeof(value) == 16));
377 memcpy(reinterpret_cast<char *>(address), &value, sizeof(value));
378 }
379 };
380
381 // Represent a register (r0-r31, v0-v31).
382 template<int kSizeInBytes>
383 class SimRegisterBase {
384 public:
SimRegisterBase()385 SimRegisterBase() : written_since_last_log_(false) {}
386
387 // Write the specified value. The value is zero-extended if necessary.
388 template<typename T>
Set(T new_value)389 void Set(T new_value) {
390 VIXL_STATIC_ASSERT(sizeof(new_value) <= kSizeInBytes);
391 if (sizeof(new_value) < kSizeInBytes) {
392 // All AArch64 registers are zero-extending.
393 memset(value_ + sizeof(new_value), 0, kSizeInBytes - sizeof(new_value));
394 }
395 memcpy(value_, &new_value, sizeof(new_value));
396 NotifyRegisterWrite();
397 }
398
399 // Insert a typed value into a register, leaving the rest of the register
400 // unchanged. The lane parameter indicates where in the register the value
401 // should be inserted, in the range [ 0, sizeof(value_) / sizeof(T) ), where
402 // 0 represents the least significant bits.
403 template<typename T>
Insert(int lane,T new_value)404 void Insert(int lane, T new_value) {
405 VIXL_ASSERT(lane >= 0);
406 VIXL_ASSERT((sizeof(new_value) +
407 (lane * sizeof(new_value))) <= kSizeInBytes);
408 memcpy(&value_[lane * sizeof(new_value)], &new_value, sizeof(new_value));
409 NotifyRegisterWrite();
410 }
411
412 // Read the value as the specified type. The value is truncated if necessary.
413 template<typename T>
414 T Get(int lane = 0) const {
415 T result;
416 VIXL_ASSERT(lane >= 0);
417 VIXL_ASSERT((sizeof(result) + (lane * sizeof(result))) <= kSizeInBytes);
418 memcpy(&result, &value_[lane * sizeof(result)], sizeof(result));
419 return result;
420 }
421
422 // TODO: Make this return a map of updated bytes, so that we can highlight
423 // updated lanes for load-and-insert. (That never happens for scalar code, but
424 // NEON has some instructions that can update individual lanes.)
WrittenSinceLastLog()425 bool WrittenSinceLastLog() const {
426 return written_since_last_log_;
427 }
428
NotifyRegisterLogged()429 void NotifyRegisterLogged() {
430 written_since_last_log_ = false;
431 }
432
433 protected:
434 uint8_t value_[kSizeInBytes];
435
436 // Helpers to aid with register tracing.
437 bool written_since_last_log_;
438
NotifyRegisterWrite()439 void NotifyRegisterWrite() {
440 written_since_last_log_ = true;
441 }
442 };
443 typedef SimRegisterBase<kXRegSizeInBytes> SimRegister; // r0-r31
444 typedef SimRegisterBase<kQRegSizeInBytes> SimVRegister; // v0-v31
445
446 // Representation of a vector register, with typed getters and setters for lanes
447 // and additional information to represent lane state.
448 class LogicVRegister {
449 public:
LogicVRegister(SimVRegister & other)450 inline LogicVRegister(SimVRegister& other) // NOLINT
451 : register_(other) {
452 for (unsigned i = 0; i < sizeof(saturated_) / sizeof(saturated_[0]); i++) {
453 saturated_[i] = kNotSaturated;
454 }
455 for (unsigned i = 0; i < sizeof(round_) / sizeof(round_[0]); i++) {
456 round_[i] = 0;
457 }
458 }
459
Int(VectorFormat vform,int index)460 int64_t Int(VectorFormat vform, int index) const {
461 int64_t element;
462 switch (LaneSizeInBitsFromFormat(vform)) {
463 case 8: element = register_.Get<int8_t>(index); break;
464 case 16: element = register_.Get<int16_t>(index); break;
465 case 32: element = register_.Get<int32_t>(index); break;
466 case 64: element = register_.Get<int64_t>(index); break;
467 default: VIXL_UNREACHABLE(); return 0;
468 }
469 return element;
470 }
471
Uint(VectorFormat vform,int index)472 uint64_t Uint(VectorFormat vform, int index) const {
473 uint64_t element;
474 switch (LaneSizeInBitsFromFormat(vform)) {
475 case 8: element = register_.Get<uint8_t>(index); break;
476 case 16: element = register_.Get<uint16_t>(index); break;
477 case 32: element = register_.Get<uint32_t>(index); break;
478 case 64: element = register_.Get<uint64_t>(index); break;
479 default: VIXL_UNREACHABLE(); return 0;
480 }
481 return element;
482 }
483
IntLeftJustified(VectorFormat vform,int index)484 int64_t IntLeftJustified(VectorFormat vform, int index) const {
485 return Int(vform, index) << (64 - LaneSizeInBitsFromFormat(vform));
486 }
487
UintLeftJustified(VectorFormat vform,int index)488 uint64_t UintLeftJustified(VectorFormat vform, int index) const {
489 return Uint(vform, index) << (64 - LaneSizeInBitsFromFormat(vform));
490 }
491
SetInt(VectorFormat vform,int index,int64_t value)492 void SetInt(VectorFormat vform, int index, int64_t value) const {
493 switch (LaneSizeInBitsFromFormat(vform)) {
494 case 8: register_.Insert(index, static_cast<int8_t>(value)); break;
495 case 16: register_.Insert(index, static_cast<int16_t>(value)); break;
496 case 32: register_.Insert(index, static_cast<int32_t>(value)); break;
497 case 64: register_.Insert(index, static_cast<int64_t>(value)); break;
498 default: VIXL_UNREACHABLE(); return;
499 }
500 }
501
SetUint(VectorFormat vform,int index,uint64_t value)502 void SetUint(VectorFormat vform, int index, uint64_t value) const {
503 switch (LaneSizeInBitsFromFormat(vform)) {
504 case 8: register_.Insert(index, static_cast<uint8_t>(value)); break;
505 case 16: register_.Insert(index, static_cast<uint16_t>(value)); break;
506 case 32: register_.Insert(index, static_cast<uint32_t>(value)); break;
507 case 64: register_.Insert(index, static_cast<uint64_t>(value)); break;
508 default: VIXL_UNREACHABLE(); return;
509 }
510 }
511
ReadUintFromMem(VectorFormat vform,int index,uint64_t addr)512 void ReadUintFromMem(VectorFormat vform, int index, uint64_t addr) const {
513 switch (LaneSizeInBitsFromFormat(vform)) {
514 case 8: register_.Insert(index, Memory::Read<uint8_t>(addr)); break;
515 case 16: register_.Insert(index, Memory::Read<uint16_t>(addr)); break;
516 case 32: register_.Insert(index, Memory::Read<uint32_t>(addr)); break;
517 case 64: register_.Insert(index, Memory::Read<uint64_t>(addr)); break;
518 default: VIXL_UNREACHABLE(); return;
519 }
520 }
521
WriteUintToMem(VectorFormat vform,int index,uint64_t addr)522 void WriteUintToMem(VectorFormat vform, int index, uint64_t addr) const {
523 switch (LaneSizeInBitsFromFormat(vform)) {
524 case 8: Memory::Write<uint8_t>(addr, Uint(vform, index)); break;
525 case 16: Memory::Write<uint16_t>(addr, Uint(vform, index)); break;
526 case 32: Memory::Write<uint32_t>(addr, Uint(vform, index)); break;
527 case 64: Memory::Write<uint64_t>(addr, Uint(vform, index)); break;
528 }
529 }
530
531 template <typename T>
Float(int index)532 T Float(int index) const {
533 return register_.Get<T>(index);
534 }
535
536 template <typename T>
SetFloat(int index,T value)537 void SetFloat(int index, T value) const {
538 register_.Insert(index, value);
539 }
540
541 // When setting a result in a register of size less than Q, the top bits of
542 // the Q register must be cleared.
ClearForWrite(VectorFormat vform)543 void ClearForWrite(VectorFormat vform) const {
544 unsigned size = RegisterSizeInBytesFromFormat(vform);
545 for (unsigned i = size; i < kQRegSizeInBytes; i++) {
546 SetUint(kFormat16B, i, 0);
547 }
548 }
549
550 // Saturation state for each lane of a vector.
551 enum Saturation {
552 kNotSaturated = 0,
553 kSignedSatPositive = 1 << 0,
554 kSignedSatNegative = 1 << 1,
555 kSignedSatMask = kSignedSatPositive | kSignedSatNegative,
556 kSignedSatUndefined = kSignedSatMask,
557 kUnsignedSatPositive = 1 << 2,
558 kUnsignedSatNegative = 1 << 3,
559 kUnsignedSatMask = kUnsignedSatPositive | kUnsignedSatNegative,
560 kUnsignedSatUndefined = kUnsignedSatMask
561 };
562
563 // Getters for saturation state.
GetSignedSaturation(int index)564 Saturation GetSignedSaturation(int index) {
565 return static_cast<Saturation>(saturated_[index] & kSignedSatMask);
566 }
567
GetUnsignedSaturation(int index)568 Saturation GetUnsignedSaturation(int index) {
569 return static_cast<Saturation>(saturated_[index] & kUnsignedSatMask);
570 }
571
572 // Setters for saturation state.
ClearSat(int index)573 void ClearSat(int index) {
574 saturated_[index] = kNotSaturated;
575 }
576
SetSignedSat(int index,bool positive)577 void SetSignedSat(int index, bool positive) {
578 SetSatFlag(index, positive ? kSignedSatPositive : kSignedSatNegative);
579 }
580
SetUnsignedSat(int index,bool positive)581 void SetUnsignedSat(int index, bool positive) {
582 SetSatFlag(index, positive ? kUnsignedSatPositive : kUnsignedSatNegative);
583 }
584
SetSatFlag(int index,Saturation sat)585 void SetSatFlag(int index, Saturation sat) {
586 saturated_[index] = static_cast<Saturation>(saturated_[index] | sat);
587 VIXL_ASSERT((sat & kUnsignedSatMask) != kUnsignedSatUndefined);
588 VIXL_ASSERT((sat & kSignedSatMask) != kSignedSatUndefined);
589 }
590
591 // Saturate lanes of a vector based on saturation state.
SignedSaturate(VectorFormat vform)592 LogicVRegister& SignedSaturate(VectorFormat vform) {
593 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
594 Saturation sat = GetSignedSaturation(i);
595 if (sat == kSignedSatPositive) {
596 SetInt(vform, i, MaxIntFromFormat(vform));
597 } else if (sat == kSignedSatNegative) {
598 SetInt(vform, i, MinIntFromFormat(vform));
599 }
600 }
601 return *this;
602 }
603
UnsignedSaturate(VectorFormat vform)604 LogicVRegister& UnsignedSaturate(VectorFormat vform) {
605 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
606 Saturation sat = GetUnsignedSaturation(i);
607 if (sat == kUnsignedSatPositive) {
608 SetUint(vform, i, MaxUintFromFormat(vform));
609 } else if (sat == kUnsignedSatNegative) {
610 SetUint(vform, i, 0);
611 }
612 }
613 return *this;
614 }
615
616 // Getter for rounding state.
GetRounding(int index)617 bool GetRounding(int index) {
618 return round_[index];
619 }
620
621 // Setter for rounding state.
SetRounding(int index,bool round)622 void SetRounding(int index, bool round) {
623 round_[index] = round;
624 }
625
626 // Round lanes of a vector based on rounding state.
Round(VectorFormat vform)627 LogicVRegister& Round(VectorFormat vform) {
628 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
629 SetInt(vform, i, Int(vform, i) + (GetRounding(i) ? 1 : 0));
630 }
631 return *this;
632 }
633
634 // Unsigned halve lanes of a vector, and use the saturation state to set the
635 // top bit.
Uhalve(VectorFormat vform)636 LogicVRegister& Uhalve(VectorFormat vform) {
637 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
638 uint64_t val = Uint(vform, i);
639 SetRounding(i, (val & 1) == 1);
640 val >>= 1;
641 if (GetUnsignedSaturation(i) != kNotSaturated) {
642 // If the operation causes unsigned saturation, the bit shifted into the
643 // most significant bit must be set.
644 val |= (MaxUintFromFormat(vform) >> 1) + 1;
645 }
646 SetInt(vform, i, val);
647 }
648 return *this;
649 }
650
651 // Signed halve lanes of a vector, and use the carry state to set the top bit.
Halve(VectorFormat vform)652 LogicVRegister& Halve(VectorFormat vform) {
653 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
654 int64_t val = Int(vform, i);
655 SetRounding(i, (val & 1) == 1);
656 val >>= 1;
657 if (GetSignedSaturation(i) != kNotSaturated) {
658 // If the operation causes signed saturation, the sign bit must be
659 // inverted.
660 val ^= (MaxUintFromFormat(vform) >> 1) + 1;
661 }
662 SetInt(vform, i, val);
663 }
664 return *this;
665 }
666
667 private:
668 SimVRegister& register_;
669
670 // Allocate one saturation state entry per lane; largest register is type Q,
671 // and lanes can be a minimum of one byte wide.
672 Saturation saturated_[kQRegSizeInBytes];
673
674 // Allocate one rounding state entry per lane.
675 bool round_[kQRegSizeInBytes];
676 };
677
678 // The proper way to initialize a simulated system register (such as NZCV) is as
679 // follows:
680 // SimSystemRegister nzcv = SimSystemRegister::DefaultValueFor(NZCV);
681 class SimSystemRegister {
682 public:
683 // The default constructor represents a register which has no writable bits.
684 // It is not possible to set its value to anything other than 0.
SimSystemRegister()685 SimSystemRegister() : value_(0), write_ignore_mask_(0xffffffff) { }
686
RawValue()687 uint32_t RawValue() const {
688 return value_;
689 }
690
SetRawValue(uint32_t new_value)691 void SetRawValue(uint32_t new_value) {
692 value_ = (value_ & write_ignore_mask_) | (new_value & ~write_ignore_mask_);
693 }
694
Bits(int msb,int lsb)695 uint32_t Bits(int msb, int lsb) const {
696 return unsigned_bitextract_32(msb, lsb, value_);
697 }
698
SignedBits(int msb,int lsb)699 int32_t SignedBits(int msb, int lsb) const {
700 return signed_bitextract_32(msb, lsb, value_);
701 }
702
703 void SetBits(int msb, int lsb, uint32_t bits);
704
705 // Default system register values.
706 static SimSystemRegister DefaultValueFor(SystemRegister id);
707
708 #define DEFINE_GETTER(Name, HighBit, LowBit, Func) \
709 uint32_t Name() const { return Func(HighBit, LowBit); } \
710 void Set##Name(uint32_t bits) { SetBits(HighBit, LowBit, bits); }
711 #define DEFINE_WRITE_IGNORE_MASK(Name, Mask) \
712 static const uint32_t Name##WriteIgnoreMask = ~static_cast<uint32_t>(Mask);
713
SYSTEM_REGISTER_FIELDS_LIST(DEFINE_GETTER,DEFINE_WRITE_IGNORE_MASK)714 SYSTEM_REGISTER_FIELDS_LIST(DEFINE_GETTER, DEFINE_WRITE_IGNORE_MASK)
715
716 #undef DEFINE_ZERO_BITS
717 #undef DEFINE_GETTER
718
719 protected:
720 // Most system registers only implement a few of the bits in the word. Other
721 // bits are "read-as-zero, write-ignored". The write_ignore_mask argument
722 // describes the bits which are not modifiable.
723 SimSystemRegister(uint32_t value, uint32_t write_ignore_mask)
724 : value_(value), write_ignore_mask_(write_ignore_mask) { }
725
726 uint32_t value_;
727 uint32_t write_ignore_mask_;
728 };
729
730
731 class SimExclusiveLocalMonitor {
732 public:
SimExclusiveLocalMonitor()733 SimExclusiveLocalMonitor() : kSkipClearProbability(8), seed_(0x87654321) {
734 Clear();
735 }
736
737 // Clear the exclusive monitor (like clrex).
Clear()738 void Clear() {
739 address_ = 0;
740 size_ = 0;
741 }
742
743 // Clear the exclusive monitor most of the time.
MaybeClear()744 void MaybeClear() {
745 if ((seed_ % kSkipClearProbability) != 0) {
746 Clear();
747 }
748
749 // Advance seed_ using a simple linear congruential generator.
750 seed_ = (seed_ * 48271) % 2147483647;
751 }
752
753 // Mark the address range for exclusive access (like load-exclusive).
MarkExclusive(uint64_t address,size_t size)754 void MarkExclusive(uint64_t address, size_t size) {
755 address_ = address;
756 size_ = size;
757 }
758
759 // Return true if the address range is marked (like store-exclusive).
760 // This helper doesn't implicitly clear the monitor.
IsExclusive(uint64_t address,size_t size)761 bool IsExclusive(uint64_t address, size_t size) {
762 VIXL_ASSERT(size > 0);
763 // Be pedantic: Require both the address and the size to match.
764 return (size == size_) && (address == address_);
765 }
766
767 private:
768 uint64_t address_;
769 size_t size_;
770
771 const int kSkipClearProbability;
772 uint32_t seed_;
773 };
774
775
776 // We can't accurate simulate the global monitor since it depends on external
777 // influences. Instead, this implementation occasionally causes accesses to
778 // fail, according to kPassProbability.
779 class SimExclusiveGlobalMonitor {
780 public:
SimExclusiveGlobalMonitor()781 SimExclusiveGlobalMonitor() : kPassProbability(8), seed_(0x87654321) {}
782
IsExclusive(uint64_t address,size_t size)783 bool IsExclusive(uint64_t address, size_t size) {
784 USE(address);
785 USE(size);
786
787 bool pass = (seed_ % kPassProbability) != 0;
788 // Advance seed_ using a simple linear congruential generator.
789 seed_ = (seed_ * 48271) % 2147483647;
790 return pass;
791 }
792
793 private:
794 const int kPassProbability;
795 uint32_t seed_;
796 };
797
798
799 class Simulator : public DecoderVisitor {
800 public:
801 explicit Simulator(Decoder* decoder, FILE* stream = stdout);
802 ~Simulator();
803
804 void ResetState();
805
806 // Run the simulator.
807 virtual void Run();
808 void RunFrom(const Instruction* first);
809
810 // Simulation helpers.
pc()811 const Instruction* pc() const { return pc_; }
set_pc(const Instruction * new_pc)812 void set_pc(const Instruction* new_pc) {
813 pc_ = Memory::AddressUntag(new_pc);
814 pc_modified_ = true;
815 }
816
increment_pc()817 void increment_pc() {
818 if (!pc_modified_) {
819 pc_ = pc_->NextInstruction();
820 }
821
822 pc_modified_ = false;
823 }
824
ExecuteInstruction()825 void ExecuteInstruction() {
826 // The program counter should always be aligned.
827 VIXL_ASSERT(IsWordAligned(pc_));
828 decoder_->Decode(pc_);
829 increment_pc();
830 }
831
832 // Declare all Visitor functions.
833 #define DECLARE(A) virtual void Visit##A(const Instruction* instr);
VISITOR_LIST(DECLARE)834 VISITOR_LIST(DECLARE)
835 #undef DECLARE
836
837 // Integer register accessors.
838
839 // Basic accessor: Read the register as the specified type.
840 template<typename T>
841 T reg(unsigned code, Reg31Mode r31mode = Reg31IsZeroRegister) const {
842 VIXL_ASSERT(code < kNumberOfRegisters);
843 if ((code == 31) && (r31mode == Reg31IsZeroRegister)) {
844 T result;
845 memset(&result, 0, sizeof(result));
846 return result;
847 }
848 return registers_[code].Get<T>();
849 }
850
851 // Common specialized accessors for the reg() template.
852 int32_t wreg(unsigned code,
853 Reg31Mode r31mode = Reg31IsZeroRegister) const {
854 return reg<int32_t>(code, r31mode);
855 }
856
857 int64_t xreg(unsigned code,
858 Reg31Mode r31mode = Reg31IsZeroRegister) const {
859 return reg<int64_t>(code, r31mode);
860 }
861
862 // As above, with parameterized size and return type. The value is
863 // either zero-extended or truncated to fit, as required.
864 template<typename T>
865 T reg(unsigned size, unsigned code,
866 Reg31Mode r31mode = Reg31IsZeroRegister) const {
867 uint64_t raw;
868 switch (size) {
869 case kWRegSize: raw = reg<uint32_t>(code, r31mode); break;
870 case kXRegSize: raw = reg<uint64_t>(code, r31mode); break;
871 default:
872 VIXL_UNREACHABLE();
873 return 0;
874 }
875
876 T result;
877 VIXL_STATIC_ASSERT(sizeof(result) <= sizeof(raw));
878 // Copy the result and truncate to fit. This assumes a little-endian host.
879 memcpy(&result, &raw, sizeof(result));
880 return result;
881 }
882
883 // Use int64_t by default if T is not specified.
884 int64_t reg(unsigned size, unsigned code,
885 Reg31Mode r31mode = Reg31IsZeroRegister) const {
886 return reg<int64_t>(size, code, r31mode);
887 }
888
889 enum RegLogMode {
890 LogRegWrites,
891 NoRegLog
892 };
893
894 // Write 'value' into an integer register. The value is zero-extended. This
895 // behaviour matches AArch64 register writes.
896 template<typename T>
897 void set_reg(unsigned code, T value,
898 RegLogMode log_mode = LogRegWrites,
899 Reg31Mode r31mode = Reg31IsZeroRegister) {
900 VIXL_STATIC_ASSERT((sizeof(T) == kWRegSizeInBytes) ||
901 (sizeof(T) == kXRegSizeInBytes));
902 VIXL_ASSERT(code < kNumberOfRegisters);
903
904 if ((code == 31) && (r31mode == Reg31IsZeroRegister)) {
905 return;
906 }
907
908 registers_[code].Set(value);
909
910 if (log_mode == LogRegWrites) LogRegister(code, r31mode);
911 }
912
913 // Common specialized accessors for the set_reg() template.
914 void set_wreg(unsigned code, int32_t value,
915 RegLogMode log_mode = LogRegWrites,
916 Reg31Mode r31mode = Reg31IsZeroRegister) {
917 set_reg(code, value, log_mode, r31mode);
918 }
919
920 void set_xreg(unsigned code, int64_t value,
921 RegLogMode log_mode = LogRegWrites,
922 Reg31Mode r31mode = Reg31IsZeroRegister) {
923 set_reg(code, value, log_mode, r31mode);
924 }
925
926 // As above, with parameterized size and type. The value is either
927 // zero-extended or truncated to fit, as required.
928 template<typename T>
929 void set_reg(unsigned size, unsigned code, T value,
930 RegLogMode log_mode = LogRegWrites,
931 Reg31Mode r31mode = Reg31IsZeroRegister) {
932 // Zero-extend the input.
933 uint64_t raw = 0;
934 VIXL_STATIC_ASSERT(sizeof(value) <= sizeof(raw));
935 memcpy(&raw, &value, sizeof(value));
936
937 // Write (and possibly truncate) the value.
938 switch (size) {
939 case kWRegSize: set_reg<uint32_t>(code, raw, log_mode, r31mode); break;
940 case kXRegSize: set_reg<uint64_t>(code, raw, log_mode, r31mode); break;
941 default:
942 VIXL_UNREACHABLE();
943 return;
944 }
945 }
946
947 // Common specialized accessors for the set_reg() template.
948
949 // Commonly-used special cases.
950 template<typename T>
set_lr(T value)951 void set_lr(T value) {
952 set_reg(kLinkRegCode, value);
953 }
954
955 template<typename T>
set_sp(T value)956 void set_sp(T value) {
957 set_reg(31, value, LogRegWrites, Reg31IsStackPointer);
958 }
959
960 // Vector register accessors.
961 // These are equivalent to the integer register accessors, but for vector
962 // registers.
963
964 // A structure for representing a 128-bit Q register.
965 struct qreg_t { uint8_t val[kQRegSizeInBytes]; };
966
967 // Basic accessor: read the register as the specified type.
968 template<typename T>
vreg(unsigned code)969 T vreg(unsigned code) const {
970 VIXL_STATIC_ASSERT((sizeof(T) == kBRegSizeInBytes) ||
971 (sizeof(T) == kHRegSizeInBytes) ||
972 (sizeof(T) == kSRegSizeInBytes) ||
973 (sizeof(T) == kDRegSizeInBytes) ||
974 (sizeof(T) == kQRegSizeInBytes));
975 VIXL_ASSERT(code < kNumberOfVRegisters);
976
977 return vregisters_[code].Get<T>();
978 }
979
980 // Common specialized accessors for the vreg() template.
breg(unsigned code)981 int8_t breg(unsigned code) const {
982 return vreg<int8_t>(code);
983 }
984
hreg(unsigned code)985 int16_t hreg(unsigned code) const {
986 return vreg<int16_t>(code);
987 }
988
sreg(unsigned code)989 float sreg(unsigned code) const {
990 return vreg<float>(code);
991 }
992
sreg_bits(unsigned code)993 uint32_t sreg_bits(unsigned code) const {
994 return vreg<uint32_t>(code);
995 }
996
dreg(unsigned code)997 double dreg(unsigned code) const {
998 return vreg<double>(code);
999 }
1000
dreg_bits(unsigned code)1001 uint64_t dreg_bits(unsigned code) const {
1002 return vreg<uint64_t>(code);
1003 }
1004
qreg(unsigned code)1005 qreg_t qreg(unsigned code) const {
1006 return vreg<qreg_t>(code);
1007 }
1008
1009 // As above, with parameterized size and return type. The value is
1010 // either zero-extended or truncated to fit, as required.
1011 template<typename T>
vreg(unsigned size,unsigned code)1012 T vreg(unsigned size, unsigned code) const {
1013 uint64_t raw = 0;
1014 T result;
1015
1016 switch (size) {
1017 case kSRegSize: raw = vreg<uint32_t>(code); break;
1018 case kDRegSize: raw = vreg<uint64_t>(code); break;
1019 default:
1020 VIXL_UNREACHABLE();
1021 break;
1022 }
1023
1024 VIXL_STATIC_ASSERT(sizeof(result) <= sizeof(raw));
1025 // Copy the result and truncate to fit. This assumes a little-endian host.
1026 memcpy(&result, &raw, sizeof(result));
1027 return result;
1028 }
1029
vreg(unsigned code)1030 inline SimVRegister& vreg(unsigned code) {
1031 return vregisters_[code];
1032 }
1033
1034 // Basic accessor: Write the specified value.
1035 template<typename T>
1036 void set_vreg(unsigned code, T value,
1037 RegLogMode log_mode = LogRegWrites) {
1038 VIXL_STATIC_ASSERT((sizeof(value) == kBRegSizeInBytes) ||
1039 (sizeof(value) == kHRegSizeInBytes) ||
1040 (sizeof(value) == kSRegSizeInBytes) ||
1041 (sizeof(value) == kDRegSizeInBytes) ||
1042 (sizeof(value) == kQRegSizeInBytes));
1043 VIXL_ASSERT(code < kNumberOfVRegisters);
1044 vregisters_[code].Set(value);
1045
1046 if (log_mode == LogRegWrites) {
1047 LogVRegister(code, GetPrintRegisterFormat(value));
1048 }
1049 }
1050
1051 // Common specialized accessors for the set_vreg() template.
1052 void set_breg(unsigned code, int8_t value,
1053 RegLogMode log_mode = LogRegWrites) {
1054 set_vreg(code, value, log_mode);
1055 }
1056
1057 void set_hreg(unsigned code, int16_t value,
1058 RegLogMode log_mode = LogRegWrites) {
1059 set_vreg(code, value, log_mode);
1060 }
1061
1062 void set_sreg(unsigned code, float value,
1063 RegLogMode log_mode = LogRegWrites) {
1064 set_vreg(code, value, log_mode);
1065 }
1066
1067 void set_sreg_bits(unsigned code, uint32_t value,
1068 RegLogMode log_mode = LogRegWrites) {
1069 set_vreg(code, value, log_mode);
1070 }
1071
1072 void set_dreg(unsigned code, double value,
1073 RegLogMode log_mode = LogRegWrites) {
1074 set_vreg(code, value, log_mode);
1075 }
1076
1077 void set_dreg_bits(unsigned code, uint64_t value,
1078 RegLogMode log_mode = LogRegWrites) {
1079 set_vreg(code, value, log_mode);
1080 }
1081
1082 void set_qreg(unsigned code, qreg_t value,
1083 RegLogMode log_mode = LogRegWrites) {
1084 set_vreg(code, value, log_mode);
1085 }
1086
N()1087 bool N() const { return nzcv_.N() != 0; }
Z()1088 bool Z() const { return nzcv_.Z() != 0; }
C()1089 bool C() const { return nzcv_.C() != 0; }
V()1090 bool V() const { return nzcv_.V() != 0; }
nzcv()1091 SimSystemRegister& nzcv() { return nzcv_; }
1092
1093 // TODO: Find a way to make the fpcr_ members return the proper types, so
1094 // these accessors are not necessary.
RMode()1095 FPRounding RMode() { return static_cast<FPRounding>(fpcr_.RMode()); }
DN()1096 bool DN() { return fpcr_.DN() != 0; }
fpcr()1097 SimSystemRegister& fpcr() { return fpcr_; }
1098
1099 // Specify relevant register formats for Print(V)Register and related helpers.
1100 enum PrintRegisterFormat {
1101 // The lane size.
1102 kPrintRegLaneSizeB = 0 << 0,
1103 kPrintRegLaneSizeH = 1 << 0,
1104 kPrintRegLaneSizeS = 2 << 0,
1105 kPrintRegLaneSizeW = kPrintRegLaneSizeS,
1106 kPrintRegLaneSizeD = 3 << 0,
1107 kPrintRegLaneSizeX = kPrintRegLaneSizeD,
1108 kPrintRegLaneSizeQ = 4 << 0,
1109
1110 kPrintRegLaneSizeOffset = 0,
1111 kPrintRegLaneSizeMask = 7 << 0,
1112
1113 // The lane count.
1114 kPrintRegAsScalar = 0,
1115 kPrintRegAsDVector = 1 << 3,
1116 kPrintRegAsQVector = 2 << 3,
1117
1118 kPrintRegAsVectorMask = 3 << 3,
1119
1120 // Indicate floating-point format lanes. (This flag is only supported for S-
1121 // and D-sized lanes.)
1122 kPrintRegAsFP = 1 << 5,
1123
1124 // Supported combinations.
1125
1126 kPrintXReg = kPrintRegLaneSizeX | kPrintRegAsScalar,
1127 kPrintWReg = kPrintRegLaneSizeW | kPrintRegAsScalar,
1128 kPrintSReg = kPrintRegLaneSizeS | kPrintRegAsScalar | kPrintRegAsFP,
1129 kPrintDReg = kPrintRegLaneSizeD | kPrintRegAsScalar | kPrintRegAsFP,
1130
1131 kPrintReg1B = kPrintRegLaneSizeB | kPrintRegAsScalar,
1132 kPrintReg8B = kPrintRegLaneSizeB | kPrintRegAsDVector,
1133 kPrintReg16B = kPrintRegLaneSizeB | kPrintRegAsQVector,
1134 kPrintReg1H = kPrintRegLaneSizeH | kPrintRegAsScalar,
1135 kPrintReg4H = kPrintRegLaneSizeH | kPrintRegAsDVector,
1136 kPrintReg8H = kPrintRegLaneSizeH | kPrintRegAsQVector,
1137 kPrintReg1S = kPrintRegLaneSizeS | kPrintRegAsScalar,
1138 kPrintReg2S = kPrintRegLaneSizeS | kPrintRegAsDVector,
1139 kPrintReg4S = kPrintRegLaneSizeS | kPrintRegAsQVector,
1140 kPrintReg1SFP = kPrintRegLaneSizeS | kPrintRegAsScalar | kPrintRegAsFP,
1141 kPrintReg2SFP = kPrintRegLaneSizeS | kPrintRegAsDVector | kPrintRegAsFP,
1142 kPrintReg4SFP = kPrintRegLaneSizeS | kPrintRegAsQVector | kPrintRegAsFP,
1143 kPrintReg1D = kPrintRegLaneSizeD | kPrintRegAsScalar,
1144 kPrintReg2D = kPrintRegLaneSizeD | kPrintRegAsQVector,
1145 kPrintReg1DFP = kPrintRegLaneSizeD | kPrintRegAsScalar | kPrintRegAsFP,
1146 kPrintReg2DFP = kPrintRegLaneSizeD | kPrintRegAsQVector | kPrintRegAsFP,
1147 kPrintReg1Q = kPrintRegLaneSizeQ | kPrintRegAsScalar
1148 };
1149
GetPrintRegLaneSizeInBytesLog2(PrintRegisterFormat format)1150 unsigned GetPrintRegLaneSizeInBytesLog2(PrintRegisterFormat format) {
1151 return (format & kPrintRegLaneSizeMask) >> kPrintRegLaneSizeOffset;
1152 }
1153
GetPrintRegLaneSizeInBytes(PrintRegisterFormat format)1154 unsigned GetPrintRegLaneSizeInBytes(PrintRegisterFormat format) {
1155 return 1 << GetPrintRegLaneSizeInBytesLog2(format);
1156 }
1157
GetPrintRegSizeInBytesLog2(PrintRegisterFormat format)1158 unsigned GetPrintRegSizeInBytesLog2(PrintRegisterFormat format) {
1159 if (format & kPrintRegAsDVector) return kDRegSizeInBytesLog2;
1160 if (format & kPrintRegAsQVector) return kQRegSizeInBytesLog2;
1161
1162 // Scalar types.
1163 return GetPrintRegLaneSizeInBytesLog2(format);
1164 }
1165
GetPrintRegSizeInBytes(PrintRegisterFormat format)1166 unsigned GetPrintRegSizeInBytes(PrintRegisterFormat format) {
1167 return 1 << GetPrintRegSizeInBytesLog2(format);
1168 }
1169
GetPrintRegLaneCount(PrintRegisterFormat format)1170 unsigned GetPrintRegLaneCount(PrintRegisterFormat format) {
1171 unsigned reg_size_log2 = GetPrintRegSizeInBytesLog2(format);
1172 unsigned lane_size_log2 = GetPrintRegLaneSizeInBytesLog2(format);
1173 VIXL_ASSERT(reg_size_log2 >= lane_size_log2);
1174 return 1 << (reg_size_log2 - lane_size_log2);
1175 }
1176
1177 PrintRegisterFormat GetPrintRegisterFormatForSize(unsigned reg_size,
1178 unsigned lane_size);
1179
GetPrintRegisterFormatForSize(unsigned size)1180 PrintRegisterFormat GetPrintRegisterFormatForSize(unsigned size) {
1181 return GetPrintRegisterFormatForSize(size, size);
1182 }
1183
GetPrintRegisterFormatForSizeFP(unsigned size)1184 PrintRegisterFormat GetPrintRegisterFormatForSizeFP(unsigned size) {
1185 switch (size) {
1186 default: VIXL_UNREACHABLE(); return kPrintDReg;
1187 case kDRegSizeInBytes: return kPrintDReg;
1188 case kSRegSizeInBytes: return kPrintSReg;
1189 }
1190 }
1191
GetPrintRegisterFormatTryFP(PrintRegisterFormat format)1192 PrintRegisterFormat GetPrintRegisterFormatTryFP(PrintRegisterFormat format) {
1193 if ((GetPrintRegLaneSizeInBytes(format) == kSRegSizeInBytes) ||
1194 (GetPrintRegLaneSizeInBytes(format) == kDRegSizeInBytes)) {
1195 return static_cast<PrintRegisterFormat>(format | kPrintRegAsFP);
1196 }
1197 return format;
1198 }
1199
1200 template<typename T>
GetPrintRegisterFormat(T value)1201 PrintRegisterFormat GetPrintRegisterFormat(T value) {
1202 return GetPrintRegisterFormatForSize(sizeof(value));
1203 }
1204
GetPrintRegisterFormat(double value)1205 PrintRegisterFormat GetPrintRegisterFormat(double value) {
1206 VIXL_STATIC_ASSERT(sizeof(value) == kDRegSizeInBytes);
1207 return GetPrintRegisterFormatForSizeFP(sizeof(value));
1208 }
1209
GetPrintRegisterFormat(float value)1210 PrintRegisterFormat GetPrintRegisterFormat(float value) {
1211 VIXL_STATIC_ASSERT(sizeof(value) == kSRegSizeInBytes);
1212 return GetPrintRegisterFormatForSizeFP(sizeof(value));
1213 }
1214
1215 PrintRegisterFormat GetPrintRegisterFormat(VectorFormat vform);
1216
1217 // Print all registers of the specified types.
1218 void PrintRegisters();
1219 void PrintVRegisters();
1220 void PrintSystemRegisters();
1221
1222 // As above, but only print the registers that have been updated.
1223 void PrintWrittenRegisters();
1224 void PrintWrittenVRegisters();
1225
1226 // As above, but respect LOG_REG and LOG_VREG.
LogWrittenRegisters()1227 void LogWrittenRegisters() {
1228 if (trace_parameters() & LOG_REGS) PrintWrittenRegisters();
1229 }
LogWrittenVRegisters()1230 void LogWrittenVRegisters() {
1231 if (trace_parameters() & LOG_VREGS) PrintWrittenVRegisters();
1232 }
LogAllWrittenRegisters()1233 void LogAllWrittenRegisters() {
1234 LogWrittenRegisters();
1235 LogWrittenVRegisters();
1236 }
1237
1238 // Print individual register values (after update).
1239 void PrintRegister(unsigned code, Reg31Mode r31mode = Reg31IsStackPointer);
1240 void PrintVRegister(unsigned code, PrintRegisterFormat format);
1241 void PrintSystemRegister(SystemRegister id);
1242
1243 // Like Print* (above), but respect trace_parameters().
1244 void LogRegister(unsigned code, Reg31Mode r31mode = Reg31IsStackPointer) {
1245 if (trace_parameters() & LOG_REGS) PrintRegister(code, r31mode);
1246 }
LogVRegister(unsigned code,PrintRegisterFormat format)1247 void LogVRegister(unsigned code, PrintRegisterFormat format) {
1248 if (trace_parameters() & LOG_VREGS) PrintVRegister(code, format);
1249 }
LogSystemRegister(SystemRegister id)1250 void LogSystemRegister(SystemRegister id) {
1251 if (trace_parameters() & LOG_SYSREGS) PrintSystemRegister(id);
1252 }
1253
1254 // Print memory accesses.
1255 void PrintRead(uintptr_t address, unsigned reg_code,
1256 PrintRegisterFormat format);
1257 void PrintWrite(uintptr_t address, unsigned reg_code,
1258 PrintRegisterFormat format);
1259 void PrintVRead(uintptr_t address, unsigned reg_code,
1260 PrintRegisterFormat format, unsigned lane);
1261 void PrintVWrite(uintptr_t address, unsigned reg_code,
1262 PrintRegisterFormat format, unsigned lane);
1263
1264 // Like Print* (above), but respect trace_parameters().
LogRead(uintptr_t address,unsigned reg_code,PrintRegisterFormat format)1265 void LogRead(uintptr_t address, unsigned reg_code,
1266 PrintRegisterFormat format) {
1267 if (trace_parameters() & LOG_REGS) PrintRead(address, reg_code, format);
1268 }
LogWrite(uintptr_t address,unsigned reg_code,PrintRegisterFormat format)1269 void LogWrite(uintptr_t address, unsigned reg_code,
1270 PrintRegisterFormat format) {
1271 if (trace_parameters() & LOG_WRITE) PrintWrite(address, reg_code, format);
1272 }
1273 void LogVRead(uintptr_t address, unsigned reg_code,
1274 PrintRegisterFormat format, unsigned lane = 0) {
1275 if (trace_parameters() & LOG_VREGS) {
1276 PrintVRead(address, reg_code, format, lane);
1277 }
1278 }
1279 void LogVWrite(uintptr_t address, unsigned reg_code,
1280 PrintRegisterFormat format, unsigned lane = 0) {
1281 if (trace_parameters() & LOG_WRITE) {
1282 PrintVWrite(address, reg_code, format, lane);
1283 }
1284 }
1285
1286 // Helper functions for register tracing.
1287 void PrintRegisterRawHelper(unsigned code, Reg31Mode r31mode,
1288 int size_in_bytes = kXRegSizeInBytes);
1289 void PrintVRegisterRawHelper(unsigned code, int bytes = kQRegSizeInBytes,
1290 int lsb = 0);
1291 void PrintVRegisterFPHelper(unsigned code, unsigned lane_size_in_bytes,
1292 int lane_count = 1, int rightmost_lane = 0);
1293
1294 void DoUnreachable(const Instruction* instr);
1295 void DoTrace(const Instruction* instr);
1296 void DoLog(const Instruction* instr);
1297
1298 static const char* WRegNameForCode(unsigned code,
1299 Reg31Mode mode = Reg31IsZeroRegister);
1300 static const char* XRegNameForCode(unsigned code,
1301 Reg31Mode mode = Reg31IsZeroRegister);
1302 static const char* SRegNameForCode(unsigned code);
1303 static const char* DRegNameForCode(unsigned code);
1304 static const char* VRegNameForCode(unsigned code);
1305
coloured_trace()1306 bool coloured_trace() const { return coloured_trace_; }
1307 void set_coloured_trace(bool value);
1308
trace_parameters()1309 int trace_parameters() const { return trace_parameters_; }
1310 void set_trace_parameters(int parameters);
1311
1312 void set_instruction_stats(bool value);
1313
1314 // Clear the simulated local monitor to force the next store-exclusive
1315 // instruction to fail.
ClearLocalMonitor()1316 void ClearLocalMonitor() {
1317 local_monitor_.Clear();
1318 }
1319
SilenceExclusiveAccessWarning()1320 void SilenceExclusiveAccessWarning() {
1321 print_exclusive_access_warning_ = false;
1322 }
1323
1324 protected:
1325 const char* clr_normal;
1326 const char* clr_flag_name;
1327 const char* clr_flag_value;
1328 const char* clr_reg_name;
1329 const char* clr_reg_value;
1330 const char* clr_vreg_name;
1331 const char* clr_vreg_value;
1332 const char* clr_memory_address;
1333 const char* clr_warning;
1334 const char* clr_warning_message;
1335 const char* clr_printf;
1336
1337 // Simulation helpers ------------------------------------
ConditionPassed(Condition cond)1338 bool ConditionPassed(Condition cond) {
1339 switch (cond) {
1340 case eq:
1341 return Z();
1342 case ne:
1343 return !Z();
1344 case hs:
1345 return C();
1346 case lo:
1347 return !C();
1348 case mi:
1349 return N();
1350 case pl:
1351 return !N();
1352 case vs:
1353 return V();
1354 case vc:
1355 return !V();
1356 case hi:
1357 return C() && !Z();
1358 case ls:
1359 return !(C() && !Z());
1360 case ge:
1361 return N() == V();
1362 case lt:
1363 return N() != V();
1364 case gt:
1365 return !Z() && (N() == V());
1366 case le:
1367 return !(!Z() && (N() == V()));
1368 case nv:
1369 VIXL_FALLTHROUGH();
1370 case al:
1371 return true;
1372 default:
1373 VIXL_UNREACHABLE();
1374 return false;
1375 }
1376 }
1377
ConditionPassed(Instr cond)1378 bool ConditionPassed(Instr cond) {
1379 return ConditionPassed(static_cast<Condition>(cond));
1380 }
1381
ConditionFailed(Condition cond)1382 bool ConditionFailed(Condition cond) {
1383 return !ConditionPassed(cond);
1384 }
1385
1386 void AddSubHelper(const Instruction* instr, int64_t op2);
1387 int64_t AddWithCarry(unsigned reg_size,
1388 bool set_flags,
1389 int64_t src1,
1390 int64_t src2,
1391 int64_t carry_in = 0);
1392 void LogicalHelper(const Instruction* instr, int64_t op2);
1393 void ConditionalCompareHelper(const Instruction* instr, int64_t op2);
1394 void LoadStoreHelper(const Instruction* instr,
1395 int64_t offset,
1396 AddrMode addrmode);
1397 void LoadStorePairHelper(const Instruction* instr, AddrMode addrmode);
1398 uintptr_t AddressModeHelper(unsigned addr_reg,
1399 int64_t offset,
1400 AddrMode addrmode);
1401 void NEONLoadStoreMultiStructHelper(const Instruction* instr,
1402 AddrMode addr_mode);
1403 void NEONLoadStoreSingleStructHelper(const Instruction* instr,
1404 AddrMode addr_mode);
1405
AddressUntag(uint64_t address)1406 uint64_t AddressUntag(uint64_t address) {
1407 return address & ~kAddressTagMask;
1408 }
1409
1410 template <typename T>
AddressUntag(T * address)1411 T* AddressUntag(T* address) {
1412 uintptr_t address_raw = reinterpret_cast<uintptr_t>(address);
1413 return reinterpret_cast<T*>(AddressUntag(address_raw));
1414 }
1415
1416 int64_t ShiftOperand(unsigned reg_size,
1417 int64_t value,
1418 Shift shift_type,
1419 unsigned amount);
1420 int64_t Rotate(unsigned reg_width,
1421 int64_t value,
1422 Shift shift_type,
1423 unsigned amount);
1424 int64_t ExtendValue(unsigned reg_width,
1425 int64_t value,
1426 Extend extend_type,
1427 unsigned left_shift = 0);
1428
1429 enum ReverseByteMode {
1430 Reverse16 = 0,
1431 Reverse32 = 1,
1432 Reverse64 = 2
1433 };
1434 uint64_t ReverseBytes(uint64_t value, ReverseByteMode mode);
1435 uint64_t ReverseBits(uint64_t value, unsigned num_bits);
1436 uint16_t PolynomialMult(uint8_t op1, uint8_t op2);
1437
1438 void ld1(VectorFormat vform,
1439 LogicVRegister dst,
1440 uint64_t addr);
1441 void ld1(VectorFormat vform,
1442 LogicVRegister dst,
1443 int index,
1444 uint64_t addr);
1445 void ld1r(VectorFormat vform,
1446 LogicVRegister dst,
1447 uint64_t addr);
1448 void ld2(VectorFormat vform,
1449 LogicVRegister dst1,
1450 LogicVRegister dst2,
1451 uint64_t addr);
1452 void ld2(VectorFormat vform,
1453 LogicVRegister dst1,
1454 LogicVRegister dst2,
1455 int index,
1456 uint64_t addr);
1457 void ld2r(VectorFormat vform,
1458 LogicVRegister dst1,
1459 LogicVRegister dst2,
1460 uint64_t addr);
1461 void ld3(VectorFormat vform,
1462 LogicVRegister dst1,
1463 LogicVRegister dst2,
1464 LogicVRegister dst3,
1465 uint64_t addr);
1466 void ld3(VectorFormat vform,
1467 LogicVRegister dst1,
1468 LogicVRegister dst2,
1469 LogicVRegister dst3,
1470 int index,
1471 uint64_t addr);
1472 void ld3r(VectorFormat vform,
1473 LogicVRegister dst1,
1474 LogicVRegister dst2,
1475 LogicVRegister dst3,
1476 uint64_t addr);
1477 void ld4(VectorFormat vform,
1478 LogicVRegister dst1,
1479 LogicVRegister dst2,
1480 LogicVRegister dst3,
1481 LogicVRegister dst4,
1482 uint64_t addr);
1483 void ld4(VectorFormat vform,
1484 LogicVRegister dst1,
1485 LogicVRegister dst2,
1486 LogicVRegister dst3,
1487 LogicVRegister dst4,
1488 int index,
1489 uint64_t addr);
1490 void ld4r(VectorFormat vform,
1491 LogicVRegister dst1,
1492 LogicVRegister dst2,
1493 LogicVRegister dst3,
1494 LogicVRegister dst4,
1495 uint64_t addr);
1496 void st1(VectorFormat vform,
1497 LogicVRegister src,
1498 uint64_t addr);
1499 void st1(VectorFormat vform,
1500 LogicVRegister src,
1501 int index,
1502 uint64_t addr);
1503 void st2(VectorFormat vform,
1504 LogicVRegister src,
1505 LogicVRegister src2,
1506 uint64_t addr);
1507 void st2(VectorFormat vform,
1508 LogicVRegister src,
1509 LogicVRegister src2,
1510 int index,
1511 uint64_t addr);
1512 void st3(VectorFormat vform,
1513 LogicVRegister src,
1514 LogicVRegister src2,
1515 LogicVRegister src3,
1516 uint64_t addr);
1517 void st3(VectorFormat vform,
1518 LogicVRegister src,
1519 LogicVRegister src2,
1520 LogicVRegister src3,
1521 int index,
1522 uint64_t addr);
1523 void st4(VectorFormat vform,
1524 LogicVRegister src,
1525 LogicVRegister src2,
1526 LogicVRegister src3,
1527 LogicVRegister src4,
1528 uint64_t addr);
1529 void st4(VectorFormat vform,
1530 LogicVRegister src,
1531 LogicVRegister src2,
1532 LogicVRegister src3,
1533 LogicVRegister src4,
1534 int index,
1535 uint64_t addr);
1536 LogicVRegister cmp(VectorFormat vform,
1537 LogicVRegister dst,
1538 const LogicVRegister& src1,
1539 const LogicVRegister& src2,
1540 Condition cond);
1541 LogicVRegister cmp(VectorFormat vform,
1542 LogicVRegister dst,
1543 const LogicVRegister& src1,
1544 int imm,
1545 Condition cond);
1546 LogicVRegister cmptst(VectorFormat vform,
1547 LogicVRegister dst,
1548 const LogicVRegister& src1,
1549 const LogicVRegister& src2);
1550 LogicVRegister add(VectorFormat vform,
1551 LogicVRegister dst,
1552 const LogicVRegister& src1,
1553 const LogicVRegister& src2);
1554 LogicVRegister addp(VectorFormat vform,
1555 LogicVRegister dst,
1556 const LogicVRegister& src1,
1557 const LogicVRegister& src2);
1558 LogicVRegister mla(VectorFormat vform,
1559 LogicVRegister dst,
1560 const LogicVRegister& src1,
1561 const LogicVRegister& src2);
1562 LogicVRegister mls(VectorFormat vform,
1563 LogicVRegister dst,
1564 const LogicVRegister& src1,
1565 const LogicVRegister& src2);
1566 LogicVRegister mul(VectorFormat vform,
1567 LogicVRegister dst,
1568 const LogicVRegister& src1,
1569 const LogicVRegister& src2);
1570 LogicVRegister mul(VectorFormat vform,
1571 LogicVRegister dst,
1572 const LogicVRegister& src1,
1573 const LogicVRegister& src2,
1574 int index);
1575 LogicVRegister mla(VectorFormat vform,
1576 LogicVRegister dst,
1577 const LogicVRegister& src1,
1578 const LogicVRegister& src2,
1579 int index);
1580 LogicVRegister mls(VectorFormat vform,
1581 LogicVRegister dst,
1582 const LogicVRegister& src1,
1583 const LogicVRegister& src2,
1584 int index);
1585 LogicVRegister pmul(VectorFormat vform,
1586 LogicVRegister dst,
1587 const LogicVRegister& src1,
1588 const LogicVRegister& src2);
1589
1590 typedef LogicVRegister (Simulator::*ByElementOp)(VectorFormat vform,
1591 LogicVRegister dst,
1592 const LogicVRegister& src1,
1593 const LogicVRegister& src2,
1594 int index);
1595 LogicVRegister fmul(VectorFormat vform,
1596 LogicVRegister dst,
1597 const LogicVRegister& src1,
1598 const LogicVRegister& src2,
1599 int index);
1600 LogicVRegister fmla(VectorFormat vform,
1601 LogicVRegister dst,
1602 const LogicVRegister& src1,
1603 const LogicVRegister& src2,
1604 int index);
1605 LogicVRegister fmls(VectorFormat vform,
1606 LogicVRegister dst,
1607 const LogicVRegister& src1,
1608 const LogicVRegister& src2,
1609 int index);
1610 LogicVRegister fmulx(VectorFormat vform,
1611 LogicVRegister dst,
1612 const LogicVRegister& src1,
1613 const LogicVRegister& src2,
1614 int index);
1615 LogicVRegister smull(VectorFormat vform,
1616 LogicVRegister dst,
1617 const LogicVRegister& src1,
1618 const LogicVRegister& src2,
1619 int index);
1620 LogicVRegister smull2(VectorFormat vform,
1621 LogicVRegister dst,
1622 const LogicVRegister& src1,
1623 const LogicVRegister& src2,
1624 int index);
1625 LogicVRegister umull(VectorFormat vform,
1626 LogicVRegister dst,
1627 const LogicVRegister& src1,
1628 const LogicVRegister& src2,
1629 int index);
1630 LogicVRegister umull2(VectorFormat vform,
1631 LogicVRegister dst,
1632 const LogicVRegister& src1,
1633 const LogicVRegister& src2,
1634 int index);
1635 LogicVRegister smlal(VectorFormat vform,
1636 LogicVRegister dst,
1637 const LogicVRegister& src1,
1638 const LogicVRegister& src2,
1639 int index);
1640 LogicVRegister smlal2(VectorFormat vform,
1641 LogicVRegister dst,
1642 const LogicVRegister& src1,
1643 const LogicVRegister& src2,
1644 int index);
1645 LogicVRegister umlal(VectorFormat vform,
1646 LogicVRegister dst,
1647 const LogicVRegister& src1,
1648 const LogicVRegister& src2,
1649 int index);
1650 LogicVRegister umlal2(VectorFormat vform,
1651 LogicVRegister dst,
1652 const LogicVRegister& src1,
1653 const LogicVRegister& src2,
1654 int index);
1655 LogicVRegister smlsl(VectorFormat vform,
1656 LogicVRegister dst,
1657 const LogicVRegister& src1,
1658 const LogicVRegister& src2,
1659 int index);
1660 LogicVRegister smlsl2(VectorFormat vform,
1661 LogicVRegister dst,
1662 const LogicVRegister& src1,
1663 const LogicVRegister& src2,
1664 int index);
1665 LogicVRegister umlsl(VectorFormat vform,
1666 LogicVRegister dst,
1667 const LogicVRegister& src1,
1668 const LogicVRegister& src2,
1669 int index);
1670 LogicVRegister umlsl2(VectorFormat vform,
1671 LogicVRegister dst,
1672 const LogicVRegister& src1,
1673 const LogicVRegister& src2,
1674 int index);
1675 LogicVRegister sqdmull(VectorFormat vform,
1676 LogicVRegister dst,
1677 const LogicVRegister& src1,
1678 const LogicVRegister& src2,
1679 int index);
1680 LogicVRegister sqdmull2(VectorFormat vform,
1681 LogicVRegister dst,
1682 const LogicVRegister& src1,
1683 const LogicVRegister& src2,
1684 int index);
1685 LogicVRegister sqdmlal(VectorFormat vform,
1686 LogicVRegister dst,
1687 const LogicVRegister& src1,
1688 const LogicVRegister& src2,
1689 int index);
1690 LogicVRegister sqdmlal2(VectorFormat vform,
1691 LogicVRegister dst,
1692 const LogicVRegister& src1,
1693 const LogicVRegister& src2,
1694 int index);
1695 LogicVRegister sqdmlsl(VectorFormat vform,
1696 LogicVRegister dst,
1697 const LogicVRegister& src1,
1698 const LogicVRegister& src2,
1699 int index);
1700 LogicVRegister sqdmlsl2(VectorFormat vform,
1701 LogicVRegister dst,
1702 const LogicVRegister& src1,
1703 const LogicVRegister& src2,
1704 int index);
1705 LogicVRegister sqdmulh(VectorFormat vform,
1706 LogicVRegister dst,
1707 const LogicVRegister& src1,
1708 const LogicVRegister& src2,
1709 int index);
1710 LogicVRegister sqrdmulh(VectorFormat vform,
1711 LogicVRegister dst,
1712 const LogicVRegister& src1,
1713 const LogicVRegister& src2,
1714 int index);
1715 LogicVRegister sub(VectorFormat vform,
1716 LogicVRegister dst,
1717 const LogicVRegister& src1,
1718 const LogicVRegister& src2);
1719 LogicVRegister and_(VectorFormat vform,
1720 LogicVRegister dst,
1721 const LogicVRegister& src1,
1722 const LogicVRegister& src2);
1723 LogicVRegister orr(VectorFormat vform,
1724 LogicVRegister dst,
1725 const LogicVRegister& src1,
1726 const LogicVRegister& src2);
1727 LogicVRegister orn(VectorFormat vform,
1728 LogicVRegister dst,
1729 const LogicVRegister& src1,
1730 const LogicVRegister& src2);
1731 LogicVRegister eor(VectorFormat vform,
1732 LogicVRegister dst,
1733 const LogicVRegister& src1,
1734 const LogicVRegister& src2);
1735 LogicVRegister bic(VectorFormat vform,
1736 LogicVRegister dst,
1737 const LogicVRegister& src1,
1738 const LogicVRegister& src2);
1739 LogicVRegister bic(VectorFormat vform,
1740 LogicVRegister dst,
1741 const LogicVRegister& src,
1742 uint64_t imm);
1743 LogicVRegister bif(VectorFormat vform,
1744 LogicVRegister dst,
1745 const LogicVRegister& src1,
1746 const LogicVRegister& src2);
1747 LogicVRegister bit(VectorFormat vform,
1748 LogicVRegister dst,
1749 const LogicVRegister& src1,
1750 const LogicVRegister& src2);
1751 LogicVRegister bsl(VectorFormat vform,
1752 LogicVRegister dst,
1753 const LogicVRegister& src1,
1754 const LogicVRegister& src2);
1755 LogicVRegister cls(VectorFormat vform,
1756 LogicVRegister dst,
1757 const LogicVRegister& src);
1758 LogicVRegister clz(VectorFormat vform,
1759 LogicVRegister dst,
1760 const LogicVRegister& src);
1761 LogicVRegister cnt(VectorFormat vform,
1762 LogicVRegister dst,
1763 const LogicVRegister& src);
1764 LogicVRegister not_(VectorFormat vform,
1765 LogicVRegister dst,
1766 const LogicVRegister& src);
1767 LogicVRegister rbit(VectorFormat vform,
1768 LogicVRegister dst,
1769 const LogicVRegister& src);
1770 LogicVRegister rev(VectorFormat vform,
1771 LogicVRegister dst,
1772 const LogicVRegister& src,
1773 int revSize);
1774 LogicVRegister rev16(VectorFormat vform,
1775 LogicVRegister dst,
1776 const LogicVRegister& src);
1777 LogicVRegister rev32(VectorFormat vform,
1778 LogicVRegister dst,
1779 const LogicVRegister& src);
1780 LogicVRegister rev64(VectorFormat vform,
1781 LogicVRegister dst,
1782 const LogicVRegister& src);
1783 LogicVRegister addlp(VectorFormat vform,
1784 LogicVRegister dst,
1785 const LogicVRegister& src,
1786 bool is_signed,
1787 bool do_accumulate);
1788 LogicVRegister saddlp(VectorFormat vform,
1789 LogicVRegister dst,
1790 const LogicVRegister& src);
1791 LogicVRegister uaddlp(VectorFormat vform,
1792 LogicVRegister dst,
1793 const LogicVRegister& src);
1794 LogicVRegister sadalp(VectorFormat vform,
1795 LogicVRegister dst,
1796 const LogicVRegister& src);
1797 LogicVRegister uadalp(VectorFormat vform,
1798 LogicVRegister dst,
1799 const LogicVRegister& src);
1800 LogicVRegister ext(VectorFormat vform,
1801 LogicVRegister dst,
1802 const LogicVRegister& src1,
1803 const LogicVRegister& src2,
1804 int index);
1805 LogicVRegister ins_element(VectorFormat vform,
1806 LogicVRegister dst,
1807 int dst_index,
1808 const LogicVRegister& src,
1809 int src_index);
1810 LogicVRegister ins_immediate(VectorFormat vform,
1811 LogicVRegister dst,
1812 int dst_index,
1813 uint64_t imm);
1814 LogicVRegister dup_element(VectorFormat vform,
1815 LogicVRegister dst,
1816 const LogicVRegister& src,
1817 int src_index);
1818 LogicVRegister dup_immediate(VectorFormat vform,
1819 LogicVRegister dst,
1820 uint64_t imm);
1821 LogicVRegister movi(VectorFormat vform,
1822 LogicVRegister dst,
1823 uint64_t imm);
1824 LogicVRegister mvni(VectorFormat vform,
1825 LogicVRegister dst,
1826 uint64_t imm);
1827 LogicVRegister orr(VectorFormat vform,
1828 LogicVRegister dst,
1829 const LogicVRegister& src,
1830 uint64_t imm);
1831 LogicVRegister sshl(VectorFormat vform,
1832 LogicVRegister dst,
1833 const LogicVRegister& src1,
1834 const LogicVRegister& src2);
1835 LogicVRegister ushl(VectorFormat vform,
1836 LogicVRegister dst,
1837 const LogicVRegister& src1,
1838 const LogicVRegister& src2);
1839 LogicVRegister sminmax(VectorFormat vform,
1840 LogicVRegister dst,
1841 const LogicVRegister& src1,
1842 const LogicVRegister& src2,
1843 bool max);
1844 LogicVRegister smax(VectorFormat vform,
1845 LogicVRegister dst,
1846 const LogicVRegister& src1,
1847 const LogicVRegister& src2);
1848 LogicVRegister smin(VectorFormat vform,
1849 LogicVRegister dst,
1850 const LogicVRegister& src1,
1851 const LogicVRegister& src2);
1852 LogicVRegister sminmaxp(VectorFormat vform,
1853 LogicVRegister dst,
1854 int dst_index,
1855 const LogicVRegister& src,
1856 bool max);
1857 LogicVRegister smaxp(VectorFormat vform,
1858 LogicVRegister dst,
1859 const LogicVRegister& src1,
1860 const LogicVRegister& src2);
1861 LogicVRegister sminp(VectorFormat vform,
1862 LogicVRegister dst,
1863 const LogicVRegister& src1,
1864 const LogicVRegister& src2);
1865 LogicVRegister addp(VectorFormat vform,
1866 LogicVRegister dst,
1867 const LogicVRegister& src);
1868 LogicVRegister addv(VectorFormat vform,
1869 LogicVRegister dst,
1870 const LogicVRegister& src);
1871 LogicVRegister uaddlv(VectorFormat vform,
1872 LogicVRegister dst,
1873 const LogicVRegister& src);
1874 LogicVRegister saddlv(VectorFormat vform,
1875 LogicVRegister dst,
1876 const LogicVRegister& src);
1877 LogicVRegister sminmaxv(VectorFormat vform,
1878 LogicVRegister dst,
1879 const LogicVRegister& src,
1880 bool max);
1881 LogicVRegister smaxv(VectorFormat vform,
1882 LogicVRegister dst,
1883 const LogicVRegister& src);
1884 LogicVRegister sminv(VectorFormat vform,
1885 LogicVRegister dst,
1886 const LogicVRegister& src);
1887 LogicVRegister uxtl(VectorFormat vform,
1888 LogicVRegister dst,
1889 const LogicVRegister& src);
1890 LogicVRegister uxtl2(VectorFormat vform,
1891 LogicVRegister dst,
1892 const LogicVRegister& src);
1893 LogicVRegister sxtl(VectorFormat vform,
1894 LogicVRegister dst,
1895 const LogicVRegister& src);
1896 LogicVRegister sxtl2(VectorFormat vform,
1897 LogicVRegister dst,
1898 const LogicVRegister& src);
1899 LogicVRegister tbl(VectorFormat vform,
1900 LogicVRegister dst,
1901 const LogicVRegister& tab,
1902 const LogicVRegister& ind);
1903 LogicVRegister tbl(VectorFormat vform,
1904 LogicVRegister dst,
1905 const LogicVRegister& tab,
1906 const LogicVRegister& tab2,
1907 const LogicVRegister& ind);
1908 LogicVRegister tbl(VectorFormat vform,
1909 LogicVRegister dst,
1910 const LogicVRegister& tab,
1911 const LogicVRegister& tab2,
1912 const LogicVRegister& tab3,
1913 const LogicVRegister& ind);
1914 LogicVRegister tbl(VectorFormat vform,
1915 LogicVRegister dst,
1916 const LogicVRegister& tab,
1917 const LogicVRegister& tab2,
1918 const LogicVRegister& tab3,
1919 const LogicVRegister& tab4,
1920 const LogicVRegister& ind);
1921 LogicVRegister tbx(VectorFormat vform,
1922 LogicVRegister dst,
1923 const LogicVRegister& tab,
1924 const LogicVRegister& ind);
1925 LogicVRegister tbx(VectorFormat vform,
1926 LogicVRegister dst,
1927 const LogicVRegister& tab,
1928 const LogicVRegister& tab2,
1929 const LogicVRegister& ind);
1930 LogicVRegister tbx(VectorFormat vform,
1931 LogicVRegister dst,
1932 const LogicVRegister& tab,
1933 const LogicVRegister& tab2,
1934 const LogicVRegister& tab3,
1935 const LogicVRegister& ind);
1936 LogicVRegister tbx(VectorFormat vform,
1937 LogicVRegister dst,
1938 const LogicVRegister& tab,
1939 const LogicVRegister& tab2,
1940 const LogicVRegister& tab3,
1941 const LogicVRegister& tab4,
1942 const LogicVRegister& ind);
1943 LogicVRegister uaddl(VectorFormat vform,
1944 LogicVRegister dst,
1945 const LogicVRegister& src1,
1946 const LogicVRegister& src2);
1947 LogicVRegister uaddl2(VectorFormat vform,
1948 LogicVRegister dst,
1949 const LogicVRegister& src1,
1950 const LogicVRegister& src2);
1951 LogicVRegister uaddw(VectorFormat vform,
1952 LogicVRegister dst,
1953 const LogicVRegister& src1,
1954 const LogicVRegister& src2);
1955 LogicVRegister uaddw2(VectorFormat vform,
1956 LogicVRegister dst,
1957 const LogicVRegister& src1,
1958 const LogicVRegister& src2);
1959 LogicVRegister saddl(VectorFormat vform,
1960 LogicVRegister dst,
1961 const LogicVRegister& src1,
1962 const LogicVRegister& src2);
1963 LogicVRegister saddl2(VectorFormat vform,
1964 LogicVRegister dst,
1965 const LogicVRegister& src1,
1966 const LogicVRegister& src2);
1967 LogicVRegister saddw(VectorFormat vform,
1968 LogicVRegister dst,
1969 const LogicVRegister& src1,
1970 const LogicVRegister& src2);
1971 LogicVRegister saddw2(VectorFormat vform,
1972 LogicVRegister dst,
1973 const LogicVRegister& src1,
1974 const LogicVRegister& src2);
1975 LogicVRegister usubl(VectorFormat vform,
1976 LogicVRegister dst,
1977 const LogicVRegister& src1,
1978 const LogicVRegister& src2);
1979 LogicVRegister usubl2(VectorFormat vform,
1980 LogicVRegister dst,
1981 const LogicVRegister& src1,
1982 const LogicVRegister& src2);
1983 LogicVRegister usubw(VectorFormat vform,
1984 LogicVRegister dst,
1985 const LogicVRegister& src1,
1986 const LogicVRegister& src2);
1987 LogicVRegister usubw2(VectorFormat vform,
1988 LogicVRegister dst,
1989 const LogicVRegister& src1,
1990 const LogicVRegister& src2);
1991 LogicVRegister ssubl(VectorFormat vform,
1992 LogicVRegister dst,
1993 const LogicVRegister& src1,
1994 const LogicVRegister& src2);
1995 LogicVRegister ssubl2(VectorFormat vform,
1996 LogicVRegister dst,
1997 const LogicVRegister& src1,
1998 const LogicVRegister& src2);
1999 LogicVRegister ssubw(VectorFormat vform,
2000 LogicVRegister dst,
2001 const LogicVRegister& src1,
2002 const LogicVRegister& src2);
2003 LogicVRegister ssubw2(VectorFormat vform,
2004 LogicVRegister dst,
2005 const LogicVRegister& src1,
2006 const LogicVRegister& src2);
2007 LogicVRegister uminmax(VectorFormat vform,
2008 LogicVRegister dst,
2009 const LogicVRegister& src1,
2010 const LogicVRegister& src2,
2011 bool max);
2012 LogicVRegister umax(VectorFormat vform,
2013 LogicVRegister dst,
2014 const LogicVRegister& src1,
2015 const LogicVRegister& src2);
2016 LogicVRegister umin(VectorFormat vform,
2017 LogicVRegister dst,
2018 const LogicVRegister& src1,
2019 const LogicVRegister& src2);
2020 LogicVRegister uminmaxp(VectorFormat vform,
2021 LogicVRegister dst,
2022 int dst_index,
2023 const LogicVRegister& src,
2024 bool max);
2025 LogicVRegister umaxp(VectorFormat vform,
2026 LogicVRegister dst,
2027 const LogicVRegister& src1,
2028 const LogicVRegister& src2);
2029 LogicVRegister uminp(VectorFormat vform,
2030 LogicVRegister dst,
2031 const LogicVRegister& src1,
2032 const LogicVRegister& src2);
2033 LogicVRegister uminmaxv(VectorFormat vform,
2034 LogicVRegister dst,
2035 const LogicVRegister& src,
2036 bool max);
2037 LogicVRegister umaxv(VectorFormat vform,
2038 LogicVRegister dst,
2039 const LogicVRegister& src);
2040 LogicVRegister uminv(VectorFormat vform,
2041 LogicVRegister dst,
2042 const LogicVRegister& src);
2043 LogicVRegister trn1(VectorFormat vform,
2044 LogicVRegister dst,
2045 const LogicVRegister& src1,
2046 const LogicVRegister& src2);
2047 LogicVRegister trn2(VectorFormat vform,
2048 LogicVRegister dst,
2049 const LogicVRegister& src1,
2050 const LogicVRegister& src2);
2051 LogicVRegister zip1(VectorFormat vform,
2052 LogicVRegister dst,
2053 const LogicVRegister& src1,
2054 const LogicVRegister& src2);
2055 LogicVRegister zip2(VectorFormat vform,
2056 LogicVRegister dst,
2057 const LogicVRegister& src1,
2058 const LogicVRegister& src2);
2059 LogicVRegister uzp1(VectorFormat vform,
2060 LogicVRegister dst,
2061 const LogicVRegister& src1,
2062 const LogicVRegister& src2);
2063 LogicVRegister uzp2(VectorFormat vform,
2064 LogicVRegister dst,
2065 const LogicVRegister& src1,
2066 const LogicVRegister& src2);
2067 LogicVRegister shl(VectorFormat vform,
2068 LogicVRegister dst,
2069 const LogicVRegister& src,
2070 int shift);
2071 LogicVRegister scvtf(VectorFormat vform,
2072 LogicVRegister dst,
2073 const LogicVRegister& src,
2074 int fbits,
2075 FPRounding rounding_mode);
2076 LogicVRegister ucvtf(VectorFormat vform,
2077 LogicVRegister dst,
2078 const LogicVRegister& src,
2079 int fbits,
2080 FPRounding rounding_mode);
2081 LogicVRegister sshll(VectorFormat vform,
2082 LogicVRegister dst,
2083 const LogicVRegister& src,
2084 int shift);
2085 LogicVRegister sshll2(VectorFormat vform,
2086 LogicVRegister dst,
2087 const LogicVRegister& src,
2088 int shift);
2089 LogicVRegister shll(VectorFormat vform,
2090 LogicVRegister dst,
2091 const LogicVRegister& src);
2092 LogicVRegister shll2(VectorFormat vform,
2093 LogicVRegister dst,
2094 const LogicVRegister& src);
2095 LogicVRegister ushll(VectorFormat vform,
2096 LogicVRegister dst,
2097 const LogicVRegister& src,
2098 int shift);
2099 LogicVRegister ushll2(VectorFormat vform,
2100 LogicVRegister dst,
2101 const LogicVRegister& src,
2102 int shift);
2103 LogicVRegister sli(VectorFormat vform,
2104 LogicVRegister dst,
2105 const LogicVRegister& src,
2106 int shift);
2107 LogicVRegister sri(VectorFormat vform,
2108 LogicVRegister dst,
2109 const LogicVRegister& src,
2110 int shift);
2111 LogicVRegister sshr(VectorFormat vform,
2112 LogicVRegister dst,
2113 const LogicVRegister& src,
2114 int shift);
2115 LogicVRegister ushr(VectorFormat vform,
2116 LogicVRegister dst,
2117 const LogicVRegister& src,
2118 int shift);
2119 LogicVRegister ssra(VectorFormat vform,
2120 LogicVRegister dst,
2121 const LogicVRegister& src,
2122 int shift);
2123 LogicVRegister usra(VectorFormat vform,
2124 LogicVRegister dst,
2125 const LogicVRegister& src,
2126 int shift);
2127 LogicVRegister srsra(VectorFormat vform,
2128 LogicVRegister dst,
2129 const LogicVRegister& src,
2130 int shift);
2131 LogicVRegister ursra(VectorFormat vform,
2132 LogicVRegister dst,
2133 const LogicVRegister& src,
2134 int shift);
2135 LogicVRegister suqadd(VectorFormat vform,
2136 LogicVRegister dst,
2137 const LogicVRegister& src);
2138 LogicVRegister usqadd(VectorFormat vform,
2139 LogicVRegister dst,
2140 const LogicVRegister& src);
2141 LogicVRegister sqshl(VectorFormat vform,
2142 LogicVRegister dst,
2143 const LogicVRegister& src,
2144 int shift);
2145 LogicVRegister uqshl(VectorFormat vform,
2146 LogicVRegister dst,
2147 const LogicVRegister& src,
2148 int shift);
2149 LogicVRegister sqshlu(VectorFormat vform,
2150 LogicVRegister dst,
2151 const LogicVRegister& src,
2152 int shift);
2153 LogicVRegister abs(VectorFormat vform,
2154 LogicVRegister dst,
2155 const LogicVRegister& src);
2156 LogicVRegister neg(VectorFormat vform,
2157 LogicVRegister dst,
2158 const LogicVRegister& src);
2159 LogicVRegister extractnarrow(VectorFormat vform,
2160 LogicVRegister dst,
2161 bool dstIsSigned,
2162 const LogicVRegister& src,
2163 bool srcIsSigned);
2164 LogicVRegister xtn(VectorFormat vform,
2165 LogicVRegister dst,
2166 const LogicVRegister& src);
2167 LogicVRegister sqxtn(VectorFormat vform,
2168 LogicVRegister dst,
2169 const LogicVRegister& src);
2170 LogicVRegister uqxtn(VectorFormat vform,
2171 LogicVRegister dst,
2172 const LogicVRegister& src);
2173 LogicVRegister sqxtun(VectorFormat vform,
2174 LogicVRegister dst,
2175 const LogicVRegister& src);
2176 LogicVRegister absdiff(VectorFormat vform,
2177 LogicVRegister dst,
2178 const LogicVRegister& src1,
2179 const LogicVRegister& src2,
2180 bool issigned);
2181 LogicVRegister saba(VectorFormat vform,
2182 LogicVRegister dst,
2183 const LogicVRegister& src1,
2184 const LogicVRegister& src2);
2185 LogicVRegister uaba(VectorFormat vform,
2186 LogicVRegister dst,
2187 const LogicVRegister& src1,
2188 const LogicVRegister& src2);
2189 LogicVRegister shrn(VectorFormat vform,
2190 LogicVRegister dst,
2191 const LogicVRegister& src,
2192 int shift);
2193 LogicVRegister shrn2(VectorFormat vform,
2194 LogicVRegister dst,
2195 const LogicVRegister& src,
2196 int shift);
2197 LogicVRegister rshrn(VectorFormat vform,
2198 LogicVRegister dst,
2199 const LogicVRegister& src,
2200 int shift);
2201 LogicVRegister rshrn2(VectorFormat vform,
2202 LogicVRegister dst,
2203 const LogicVRegister& src,
2204 int shift);
2205 LogicVRegister uqshrn(VectorFormat vform,
2206 LogicVRegister dst,
2207 const LogicVRegister& src,
2208 int shift);
2209 LogicVRegister uqshrn2(VectorFormat vform,
2210 LogicVRegister dst,
2211 const LogicVRegister& src,
2212 int shift);
2213 LogicVRegister uqrshrn(VectorFormat vform,
2214 LogicVRegister dst,
2215 const LogicVRegister& src,
2216 int shift);
2217 LogicVRegister uqrshrn2(VectorFormat vform,
2218 LogicVRegister dst,
2219 const LogicVRegister& src,
2220 int shift);
2221 LogicVRegister sqshrn(VectorFormat vform,
2222 LogicVRegister dst,
2223 const LogicVRegister& src,
2224 int shift);
2225 LogicVRegister sqshrn2(VectorFormat vform,
2226 LogicVRegister dst,
2227 const LogicVRegister& src,
2228 int shift);
2229 LogicVRegister sqrshrn(VectorFormat vform,
2230 LogicVRegister dst,
2231 const LogicVRegister& src,
2232 int shift);
2233 LogicVRegister sqrshrn2(VectorFormat vform,
2234 LogicVRegister dst,
2235 const LogicVRegister& src,
2236 int shift);
2237 LogicVRegister sqshrun(VectorFormat vform,
2238 LogicVRegister dst,
2239 const LogicVRegister& src,
2240 int shift);
2241 LogicVRegister sqshrun2(VectorFormat vform,
2242 LogicVRegister dst,
2243 const LogicVRegister& src,
2244 int shift);
2245 LogicVRegister sqrshrun(VectorFormat vform,
2246 LogicVRegister dst,
2247 const LogicVRegister& src,
2248 int shift);
2249 LogicVRegister sqrshrun2(VectorFormat vform,
2250 LogicVRegister dst,
2251 const LogicVRegister& src,
2252 int shift);
2253 LogicVRegister sqrdmulh(VectorFormat vform,
2254 LogicVRegister dst,
2255 const LogicVRegister& src1,
2256 const LogicVRegister& src2,
2257 bool round = true);
2258 LogicVRegister sqdmulh(VectorFormat vform,
2259 LogicVRegister dst,
2260 const LogicVRegister& src1,
2261 const LogicVRegister& src2);
2262 #define NEON_3VREG_LOGIC_LIST(V) \
2263 V(addhn) \
2264 V(addhn2) \
2265 V(raddhn) \
2266 V(raddhn2) \
2267 V(subhn) \
2268 V(subhn2) \
2269 V(rsubhn) \
2270 V(rsubhn2) \
2271 V(pmull) \
2272 V(pmull2) \
2273 V(sabal) \
2274 V(sabal2) \
2275 V(uabal) \
2276 V(uabal2) \
2277 V(sabdl) \
2278 V(sabdl2) \
2279 V(uabdl) \
2280 V(uabdl2) \
2281 V(smull) \
2282 V(smull2) \
2283 V(umull) \
2284 V(umull2) \
2285 V(smlal) \
2286 V(smlal2) \
2287 V(umlal) \
2288 V(umlal2) \
2289 V(smlsl) \
2290 V(smlsl2) \
2291 V(umlsl) \
2292 V(umlsl2) \
2293 V(sqdmlal) \
2294 V(sqdmlal2) \
2295 V(sqdmlsl) \
2296 V(sqdmlsl2) \
2297 V(sqdmull) \
2298 V(sqdmull2)
2299
2300 #define DEFINE_LOGIC_FUNC(FXN) \
2301 LogicVRegister FXN(VectorFormat vform, \
2302 LogicVRegister dst, \
2303 const LogicVRegister& src1, \
2304 const LogicVRegister& src2);
2305 NEON_3VREG_LOGIC_LIST(DEFINE_LOGIC_FUNC)
2306 #undef DEFINE_LOGIC_FUNC
2307
2308 #define NEON_FP3SAME_LIST(V) \
2309 V(fadd, FPAdd, false) \
2310 V(fsub, FPSub, true) \
2311 V(fmul, FPMul, true) \
2312 V(fmulx, FPMulx, true) \
2313 V(fdiv, FPDiv, true) \
2314 V(fmax, FPMax, false) \
2315 V(fmin, FPMin, false) \
2316 V(fmaxnm, FPMaxNM, false) \
2317 V(fminnm, FPMinNM, false)
2318
2319 #define DECLARE_NEON_FP_VECTOR_OP(FN, OP, PROCNAN) \
2320 template <typename T> \
2321 LogicVRegister FN(VectorFormat vform, \
2322 LogicVRegister dst, \
2323 const LogicVRegister& src1, \
2324 const LogicVRegister& src2); \
2325 LogicVRegister FN(VectorFormat vform, \
2326 LogicVRegister dst, \
2327 const LogicVRegister& src1, \
2328 const LogicVRegister& src2);
2329 NEON_FP3SAME_LIST(DECLARE_NEON_FP_VECTOR_OP)
2330 #undef DECLARE_NEON_FP_VECTOR_OP
2331
2332 #define NEON_FPPAIRWISE_LIST(V) \
2333 V(faddp, fadd, FPAdd) \
2334 V(fmaxp, fmax, FPMax) \
2335 V(fmaxnmp, fmaxnm, FPMaxNM) \
2336 V(fminp, fmin, FPMin) \
2337 V(fminnmp, fminnm, FPMinNM)
2338
2339 #define DECLARE_NEON_FP_PAIR_OP(FNP, FN, OP) \
2340 LogicVRegister FNP(VectorFormat vform, \
2341 LogicVRegister dst, \
2342 const LogicVRegister& src1, \
2343 const LogicVRegister& src2); \
2344 LogicVRegister FNP(VectorFormat vform, \
2345 LogicVRegister dst, \
2346 const LogicVRegister& src);
2347 NEON_FPPAIRWISE_LIST(DECLARE_NEON_FP_PAIR_OP)
2348 #undef DECLARE_NEON_FP_PAIR_OP
2349
2350 template <typename T>
2351 LogicVRegister frecps(VectorFormat vform,
2352 LogicVRegister dst,
2353 const LogicVRegister& src1,
2354 const LogicVRegister& src2);
2355 LogicVRegister frecps(VectorFormat vform,
2356 LogicVRegister dst,
2357 const LogicVRegister& src1,
2358 const LogicVRegister& src2);
2359 template <typename T>
2360 LogicVRegister frsqrts(VectorFormat vform,
2361 LogicVRegister dst,
2362 const LogicVRegister& src1,
2363 const LogicVRegister& src2);
2364 LogicVRegister frsqrts(VectorFormat vform,
2365 LogicVRegister dst,
2366 const LogicVRegister& src1,
2367 const LogicVRegister& src2);
2368 template <typename T>
2369 LogicVRegister fmla(VectorFormat vform,
2370 LogicVRegister dst,
2371 const LogicVRegister& src1,
2372 const LogicVRegister& src2);
2373 LogicVRegister fmla(VectorFormat vform,
2374 LogicVRegister dst,
2375 const LogicVRegister& src1,
2376 const LogicVRegister& src2);
2377 template <typename T>
2378 LogicVRegister fmls(VectorFormat vform,
2379 LogicVRegister dst,
2380 const LogicVRegister& src1,
2381 const LogicVRegister& src2);
2382 LogicVRegister fmls(VectorFormat vform,
2383 LogicVRegister dst,
2384 const LogicVRegister& src1,
2385 const LogicVRegister& src2);
2386 LogicVRegister fnmul(VectorFormat vform,
2387 LogicVRegister dst,
2388 const LogicVRegister& src1,
2389 const LogicVRegister& src2);
2390
2391 template <typename T>
2392 LogicVRegister fcmp(VectorFormat vform,
2393 LogicVRegister dst,
2394 const LogicVRegister& src1,
2395 const LogicVRegister& src2,
2396 Condition cond);
2397 LogicVRegister fcmp(VectorFormat vform,
2398 LogicVRegister dst,
2399 const LogicVRegister& src1,
2400 const LogicVRegister& src2,
2401 Condition cond);
2402 LogicVRegister fabscmp(VectorFormat vform,
2403 LogicVRegister dst,
2404 const LogicVRegister& src1,
2405 const LogicVRegister& src2,
2406 Condition cond);
2407 LogicVRegister fcmp_zero(VectorFormat vform,
2408 LogicVRegister dst,
2409 const LogicVRegister& src,
2410 Condition cond);
2411
2412 template <typename T>
2413 LogicVRegister fneg(VectorFormat vform,
2414 LogicVRegister dst,
2415 const LogicVRegister& src);
2416 LogicVRegister fneg(VectorFormat vform,
2417 LogicVRegister dst,
2418 const LogicVRegister& src);
2419 template <typename T>
2420 LogicVRegister frecpx(VectorFormat vform,
2421 LogicVRegister dst,
2422 const LogicVRegister& src);
2423 LogicVRegister frecpx(VectorFormat vform,
2424 LogicVRegister dst,
2425 const LogicVRegister& src);
2426 template <typename T>
2427 LogicVRegister fabs_(VectorFormat vform,
2428 LogicVRegister dst,
2429 const LogicVRegister& src);
2430 LogicVRegister fabs_(VectorFormat vform,
2431 LogicVRegister dst,
2432 const LogicVRegister& src);
2433 LogicVRegister fabd(VectorFormat vform,
2434 LogicVRegister dst,
2435 const LogicVRegister& src1,
2436 const LogicVRegister& src2);
2437 LogicVRegister frint(VectorFormat vform,
2438 LogicVRegister dst,
2439 const LogicVRegister& src,
2440 FPRounding rounding_mode,
2441 bool inexact_exception = false);
2442 LogicVRegister fcvts(VectorFormat vform,
2443 LogicVRegister dst,
2444 const LogicVRegister& src,
2445 FPRounding rounding_mode,
2446 int fbits = 0);
2447 LogicVRegister fcvtu(VectorFormat vform,
2448 LogicVRegister dst,
2449 const LogicVRegister& src,
2450 FPRounding rounding_mode,
2451 int fbits = 0);
2452 LogicVRegister fcvtl(VectorFormat vform,
2453 LogicVRegister dst,
2454 const LogicVRegister& src);
2455 LogicVRegister fcvtl2(VectorFormat vform,
2456 LogicVRegister dst,
2457 const LogicVRegister& src);
2458 LogicVRegister fcvtn(VectorFormat vform,
2459 LogicVRegister dst,
2460 const LogicVRegister& src);
2461 LogicVRegister fcvtn2(VectorFormat vform,
2462 LogicVRegister dst,
2463 const LogicVRegister& src);
2464 LogicVRegister fcvtxn(VectorFormat vform,
2465 LogicVRegister dst,
2466 const LogicVRegister& src);
2467 LogicVRegister fcvtxn2(VectorFormat vform,
2468 LogicVRegister dst,
2469 const LogicVRegister& src);
2470 LogicVRegister fsqrt(VectorFormat vform,
2471 LogicVRegister dst,
2472 const LogicVRegister& src);
2473 LogicVRegister frsqrte(VectorFormat vform,
2474 LogicVRegister dst,
2475 const LogicVRegister& src);
2476 LogicVRegister frecpe(VectorFormat vform,
2477 LogicVRegister dst,
2478 const LogicVRegister& src,
2479 FPRounding rounding);
2480 LogicVRegister ursqrte(VectorFormat vform,
2481 LogicVRegister dst,
2482 const LogicVRegister& src);
2483 LogicVRegister urecpe(VectorFormat vform,
2484 LogicVRegister dst,
2485 const LogicVRegister& src);
2486
2487 typedef float (Simulator::*FPMinMaxOp)(float a, float b);
2488
2489 LogicVRegister fminmaxv(VectorFormat vform,
2490 LogicVRegister dst,
2491 const LogicVRegister& src,
2492 FPMinMaxOp Op);
2493
2494 LogicVRegister fminv(VectorFormat vform,
2495 LogicVRegister dst,
2496 const LogicVRegister& src);
2497 LogicVRegister fmaxv(VectorFormat vform,
2498 LogicVRegister dst,
2499 const LogicVRegister& src);
2500 LogicVRegister fminnmv(VectorFormat vform,
2501 LogicVRegister dst,
2502 const LogicVRegister& src);
2503 LogicVRegister fmaxnmv(VectorFormat vform,
2504 LogicVRegister dst,
2505 const LogicVRegister& src);
2506
2507 static const uint32_t CRC32_POLY = 0x04C11DB7;
2508 static const uint32_t CRC32C_POLY = 0x1EDC6F41;
2509 uint32_t Poly32Mod2(unsigned n, uint64_t data, uint32_t poly);
2510 template <typename T>
2511 uint32_t Crc32Checksum(uint32_t acc, T val, uint32_t poly);
2512 uint32_t Crc32Checksum(uint32_t acc, uint64_t val, uint32_t poly);
2513
2514 void SysOp_W(int op, int64_t val);
2515
2516 template <typename T>
2517 T FPRecipSqrtEstimate(T op);
2518 template <typename T>
2519 T FPRecipEstimate(T op, FPRounding rounding);
2520 template <typename T, typename R>
2521 R FPToFixed(T op, int fbits, bool is_signed, FPRounding rounding);
2522
2523 void FPCompare(double val0, double val1, FPTrapFlags trap);
2524 double FPRoundInt(double value, FPRounding round_mode);
2525 double FPToDouble(float value);
2526 float FPToFloat(double value, FPRounding round_mode);
2527 float FPToFloat(float16 value);
2528 float16 FPToFloat16(float value, FPRounding round_mode);
2529 float16 FPToFloat16(double value, FPRounding round_mode);
2530 double recip_sqrt_estimate(double a);
2531 double recip_estimate(double a);
2532 double FPRecipSqrtEstimate(double a);
2533 double FPRecipEstimate(double a);
2534 double FixedToDouble(int64_t src, int fbits, FPRounding round_mode);
2535 double UFixedToDouble(uint64_t src, int fbits, FPRounding round_mode);
2536 float FixedToFloat(int64_t src, int fbits, FPRounding round_mode);
2537 float UFixedToFloat(uint64_t src, int fbits, FPRounding round_mode);
2538 int32_t FPToInt32(double value, FPRounding rmode);
2539 int64_t FPToInt64(double value, FPRounding rmode);
2540 uint32_t FPToUInt32(double value, FPRounding rmode);
2541 uint64_t FPToUInt64(double value, FPRounding rmode);
2542
2543 template <typename T>
2544 T FPAdd(T op1, T op2);
2545
2546 template <typename T>
2547 T FPDiv(T op1, T op2);
2548
2549 template <typename T>
2550 T FPMax(T a, T b);
2551
2552 template <typename T>
2553 T FPMaxNM(T a, T b);
2554
2555 template <typename T>
2556 T FPMin(T a, T b);
2557
2558 template <typename T>
2559 T FPMinNM(T a, T b);
2560
2561 template <typename T>
2562 T FPMul(T op1, T op2);
2563
2564 template <typename T>
2565 T FPMulx(T op1, T op2);
2566
2567 template <typename T>
2568 T FPMulAdd(T a, T op1, T op2);
2569
2570 template <typename T>
2571 T FPSqrt(T op);
2572
2573 template <typename T>
2574 T FPSub(T op1, T op2);
2575
2576 template <typename T>
2577 T FPRecipStepFused(T op1, T op2);
2578
2579 template <typename T>
2580 T FPRSqrtStepFused(T op1, T op2);
2581
2582 // This doesn't do anything at the moment. We'll need it if we want support
2583 // for cumulative exception bits or floating-point exceptions.
FPProcessException()2584 void FPProcessException() { }
2585
2586 bool FPProcessNaNs(const Instruction* instr);
2587
2588 // Pseudo Printf instruction
2589 void DoPrintf(const Instruction* instr);
2590
2591 // Processor state ---------------------------------------
2592
2593 // Simulated monitors for exclusive access instructions.
2594 SimExclusiveLocalMonitor local_monitor_;
2595 SimExclusiveGlobalMonitor global_monitor_;
2596
2597 // Output stream.
2598 FILE* stream_;
2599 PrintDisassembler* print_disasm_;
2600
2601 // Instruction statistics instrumentation.
2602 Instrument* instrumentation_;
2603
2604 // General purpose registers. Register 31 is the stack pointer.
2605 SimRegister registers_[kNumberOfRegisters];
2606
2607 // Vector registers
2608 SimVRegister vregisters_[kNumberOfVRegisters];
2609
2610 // Program Status Register.
2611 // bits[31, 27]: Condition flags N, Z, C, and V.
2612 // (Negative, Zero, Carry, Overflow)
2613 SimSystemRegister nzcv_;
2614
2615 // Floating-Point Control Register
2616 SimSystemRegister fpcr_;
2617
2618 // Only a subset of FPCR features are supported by the simulator. This helper
2619 // checks that the FPCR settings are supported.
2620 //
2621 // This is checked when floating-point instructions are executed, not when
2622 // FPCR is set. This allows generated code to modify FPCR for external
2623 // functions, or to save and restore it when entering and leaving generated
2624 // code.
AssertSupportedFPCR()2625 void AssertSupportedFPCR() {
2626 VIXL_ASSERT(fpcr().FZ() == 0); // No flush-to-zero support.
2627 VIXL_ASSERT(fpcr().RMode() == FPTieEven); // Ties-to-even rounding only.
2628
2629 // The simulator does not support half-precision operations so fpcr().AHP()
2630 // is irrelevant, and is not checked here.
2631 }
2632
CalcNFlag(uint64_t result,unsigned reg_size)2633 static int CalcNFlag(uint64_t result, unsigned reg_size) {
2634 return (result >> (reg_size - 1)) & 1;
2635 }
2636
CalcZFlag(uint64_t result)2637 static int CalcZFlag(uint64_t result) {
2638 return result == 0;
2639 }
2640
2641 static const uint32_t kConditionFlagsMask = 0xf0000000;
2642
2643 // Stack
2644 byte* stack_;
2645 static const int stack_protection_size_ = 256;
2646 // 2 KB stack.
2647 static const int stack_size_ = 2 * 1024 + 2 * stack_protection_size_;
2648 byte* stack_limit_;
2649
2650 Decoder* decoder_;
2651 // Indicates if the pc has been modified by the instruction and should not be
2652 // automatically incremented.
2653 bool pc_modified_;
2654 const Instruction* pc_;
2655
2656 static const char* xreg_names[];
2657 static const char* wreg_names[];
2658 static const char* sreg_names[];
2659 static const char* dreg_names[];
2660 static const char* vreg_names[];
2661
2662 static const Instruction* kEndOfSimAddress;
2663
2664 private:
2665 template <typename T>
2666 static T FPDefaultNaN();
2667
2668 // Standard NaN processing.
2669 template <typename T>
FPProcessNaN(T op)2670 T FPProcessNaN(T op) {
2671 VIXL_ASSERT(std::isnan(op));
2672 if (IsSignallingNaN(op)) {
2673 FPProcessException();
2674 }
2675 return DN() ? FPDefaultNaN<T>() : ToQuietNaN(op);
2676 }
2677
2678 template <typename T>
FPProcessNaNs(T op1,T op2)2679 T FPProcessNaNs(T op1, T op2) {
2680 if (IsSignallingNaN(op1)) {
2681 return FPProcessNaN(op1);
2682 } else if (IsSignallingNaN(op2)) {
2683 return FPProcessNaN(op2);
2684 } else if (std::isnan(op1)) {
2685 VIXL_ASSERT(IsQuietNaN(op1));
2686 return FPProcessNaN(op1);
2687 } else if (std::isnan(op2)) {
2688 VIXL_ASSERT(IsQuietNaN(op2));
2689 return FPProcessNaN(op2);
2690 } else {
2691 return 0.0;
2692 }
2693 }
2694
2695 template <typename T>
FPProcessNaNs3(T op1,T op2,T op3)2696 T FPProcessNaNs3(T op1, T op2, T op3) {
2697 if (IsSignallingNaN(op1)) {
2698 return FPProcessNaN(op1);
2699 } else if (IsSignallingNaN(op2)) {
2700 return FPProcessNaN(op2);
2701 } else if (IsSignallingNaN(op3)) {
2702 return FPProcessNaN(op3);
2703 } else if (std::isnan(op1)) {
2704 VIXL_ASSERT(IsQuietNaN(op1));
2705 return FPProcessNaN(op1);
2706 } else if (std::isnan(op2)) {
2707 VIXL_ASSERT(IsQuietNaN(op2));
2708 return FPProcessNaN(op2);
2709 } else if (std::isnan(op3)) {
2710 VIXL_ASSERT(IsQuietNaN(op3));
2711 return FPProcessNaN(op3);
2712 } else {
2713 return 0.0;
2714 }
2715 }
2716
2717 bool coloured_trace_;
2718
2719 // A set of TraceParameters flags.
2720 int trace_parameters_;
2721
2722 // Indicates whether the instruction instrumentation is active.
2723 bool instruction_stats_;
2724
2725 // Indicates whether the exclusive-access warning has been printed.
2726 bool print_exclusive_access_warning_;
2727 void PrintExclusiveAccessWarning();
2728 };
2729 } // namespace vixl
2730
2731 #endif // VIXL_A64_SIMULATOR_A64_H_
2732