1 /*
2  * Copyright (C) 2023 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 // This inline-header is intended be included into a source file testing the correctness of
18 // riscv64 instructions execution by an interpreter or a jit-translator.
19 //
20 // Assumptions list:
21 //
22 // 1. Includes
23 //
24 // #include "gtest/gtest.h"
25 //
26 // #include <cstdint>
27 // #include <initializer_list>
28 // #include <tuple>
29 // #include <vector>
30 //
31 // #include "berberis/base/bit_util.h"
32 // #include "berberis/guest_state/guest_addr.h"
33 // #include "berberis/guest_state/guest_state_riscv64.h"
34 //
35 // 2. RunOneInstruction is defined and implemented
36 //
37 // 3. TESTSUITE macro is defined
38 
39 #ifndef TESTSUITE
40 #error "TESTSUITE is undefined"
41 #endif
42 
43 #if !(defined(TESTING_INTERPRETER) || defined(TESTING_LITE_TRANSLATOR) || \
44       defined(TESTING_HEAVY_OPTIMIZER))
45 #error "One of TESTING_INTERPRETER, TESTING_LITE_TRANSLATOR, TESTING_HEAVY_OPTIMIZER must be defined
46 #endif
47 
48 namespace {
49 
50 // TODO(b/276787675): remove these files from interpreter when they are no longer needed there.
51 // Maybe extract FPvalueToFPReg and TupleMap to a separate header?
52 inline constexpr class FPValueToFPReg {
53  public:
54   uint64_t operator()(uint64_t value) const { return value; }
55   uint64_t operator()(float value) const {
56     return bit_cast<uint32_t>(value) | 0xffff'ffff'0000'0000;
57   }
58   uint64_t operator()(double value) const { return bit_cast<uint64_t>(value); }
59 } kFPValueToFPReg;
60 
61 // Helper function for the unit tests. Can be used to normalize values before processing.
62 //
63 // “container” is supposed to be container of tuples, e.g. std::initializer_list<std::tuple<…>>.
64 // “transformer” would be applied to the individual elements of tuples in the following loop:
65 //
66 //   for (auto& [value1, value2, value3] : TupleMap(container, [](auto value){ return …; })) {
67 //     …
68 //   }
69 //
70 // Returns vector of tuples where each tuple element is processed by transformer.
71 template <typename ContainerType, typename Transformer>
72 decltype(auto) TupleMap(const ContainerType& container, const Transformer& transformer) {
73   using std::begin;
74 
75   auto transform_tuple_func = [&transformer](auto&&... value) {
76     return std::tuple{transformer(value)...};
77   };
78 
79   std::vector<decltype(std::apply(transform_tuple_func, *begin(container)))> result;
80 
81   for (const auto& tuple : container) {
82     result.push_back(std::apply(transform_tuple_func, tuple));
83   }
84 
85   return result;
86 }
87 
88 void RaiseFeExceptForGuestFlags(uint8_t riscv_fflags) {
89   EXPECT_EQ(feclearexcept(FE_ALL_EXCEPT), 0);
90   if (riscv_fflags & FPFlags::NX) {
91     EXPECT_EQ(feraiseexcept(FE_INEXACT), 0);
92   }
93   if (riscv_fflags & FPFlags::UF) {
94     EXPECT_EQ(feraiseexcept(FE_UNDERFLOW), 0);
95   }
96   if (riscv_fflags & FPFlags::OF) {
97     EXPECT_EQ(feraiseexcept(FE_OVERFLOW), 0);
98   }
99   if (riscv_fflags & FPFlags::DZ) {
100     EXPECT_EQ(feraiseexcept(FE_DIVBYZERO), 0);
101   }
102   if (riscv_fflags & FPFlags::NV) {
103     EXPECT_EQ(feraiseexcept(FE_INVALID), 0);
104   }
105 }
106 
107 void TestFeExceptForGuestFlags(uint8_t riscv_fflags) {
108   EXPECT_EQ(bool(riscv_fflags & FPFlags::NX), bool(fetestexcept(FE_INEXACT)));
109   EXPECT_EQ(bool(riscv_fflags & FPFlags::UF), bool(fetestexcept(FE_UNDERFLOW)));
110   EXPECT_EQ(bool(riscv_fflags & FPFlags::OF), bool(fetestexcept(FE_OVERFLOW)));
111   EXPECT_EQ(bool(riscv_fflags & FPFlags::DZ), bool(fetestexcept(FE_DIVBYZERO)));
112   EXPECT_EQ(bool(riscv_fflags & FPFlags::NV), bool(fetestexcept(FE_INVALID)));
113 }
114 
115 }  // namespace
116 
117 class TESTSUITE : public ::testing::Test {
118  public:
119   TESTSUITE()
120       : state_{
121             .cpu = {.vtype = uint64_t{1} << 63, .frm = intrinsics::GuestModeFromHostRounding()}} {}
122 
123   template <uint8_t kInsnSize = 4>
124   void RunInstruction(const uint32_t& insn_bytes) {
125     state_.cpu.insn_addr = ToGuestAddr(&insn_bytes);
126     EXPECT_TRUE(RunOneInstruction<kInsnSize>(&state_, state_.cpu.insn_addr + kInsnSize));
127   }
128 
129   // Compressed Instructions.
130 
131   template <RegisterType register_type, uint64_t expected_result, uint8_t kTargetReg>
132   void TestCompressedStore(uint16_t insn_bytes, uint64_t offset) {
133     store_area_ = 0;
134     SetXReg<kTargetReg>(state_.cpu, ToGuestAddr(bit_cast<uint8_t*>(&store_area_) - offset));
135     SetReg<register_type, 9>(state_.cpu, kDataToLoad);
136     RunInstruction<2>(insn_bytes);
137     EXPECT_EQ(store_area_, expected_result);
138   }
139 
140   template <RegisterType register_type, uint64_t expected_result, uint8_t kSourceReg>
141   void TestCompressedLoad(uint16_t insn_bytes, uint64_t offset) {
142     SetXReg<kSourceReg>(state_.cpu, ToGuestAddr(bit_cast<uint8_t*>(&kDataToLoad) - offset));
143     RunInstruction<2>(insn_bytes);
144     EXPECT_EQ((GetReg<register_type, 9>(state_.cpu)), expected_result);
145   }
146 
147   void TestCAddi(uint16_t insn_bytes, uint64_t expected_increment) {
148     SetXReg<2>(state_.cpu, 1);
149     RunInstruction<2>(insn_bytes);
150     EXPECT_EQ(GetXReg<2>(state_.cpu), 1 + expected_increment);
151   }
152 
153   void TestCAddi16sp(uint16_t insn_bytes, uint64_t expected_offset) {
154     SetXReg<2>(state_.cpu, 1);
155     RunInstruction<2>(insn_bytes);
156     EXPECT_EQ(GetXReg<2>(state_.cpu), 1 + expected_offset);
157   }
158 
159   void TestLi(uint32_t insn_bytes, uint64_t expected_result) {
160     RunInstruction<2>(insn_bytes);
161     EXPECT_EQ(GetXReg<1>(state_.cpu), expected_result);
162   }
163 
164   void TestCAddi4spn(uint16_t insn_bytes, uint64_t expected_offset) {
165     SetXReg<2>(state_.cpu, 1);
166     RunInstruction<2>(insn_bytes);
167     EXPECT_EQ(GetXReg<9>(state_.cpu), 1 + expected_offset);
168   }
169 
170   void TestCBeqzBnez(uint16_t insn_bytes, uint64_t value, int16_t expected_offset) {
171     auto code_start = ToGuestAddr(&insn_bytes);
172     state_.cpu.insn_addr = code_start;
173     if (expected_offset == 0) {
174       // Emit pending signal so we don't get stuck in an infinite loop.
175       state_.pending_signals_status = kPendingSignalsPresent;
176     } else {
177       state_.pending_signals_status = kPendingSignalsDisabled;
178     }
179     SetXReg<9>(state_.cpu, value);
180     EXPECT_TRUE(RunOneInstruction<2>(&state_, state_.cpu.insn_addr + expected_offset));
181     EXPECT_EQ(state_.cpu.insn_addr, code_start + expected_offset);
182   }
183 
184   void TestCMiscAlu(uint16_t insn_bytes,
185                     std::initializer_list<std::tuple<uint64_t, uint64_t, uint64_t>> args) {
186     for (auto [arg1, arg2, expected_result] : args) {
187       SetXReg<8>(state_.cpu, arg1);
188       SetXReg<9>(state_.cpu, arg2);
189       RunInstruction<2>(insn_bytes);
190       EXPECT_EQ(GetXReg<8>(state_.cpu), expected_result);
191     }
192   }
193 
194   void TestCMiscAluImm(uint16_t insn_bytes, uint64_t value, uint64_t expected_result) {
195     SetXReg<9>(state_.cpu, value);
196     RunInstruction<2>(insn_bytes);
197     EXPECT_EQ(GetXReg<9>(state_.cpu), expected_result);
198   }
199 
200   void TestCJ(uint16_t insn_bytes, int16_t expected_offset) {
201     auto code_start = ToGuestAddr(&insn_bytes);
202     state_.cpu.insn_addr = code_start;
203     if (expected_offset == 0) {
204       // Emit pending signal so we don't get stuck in an infinite loop.
205       state_.pending_signals_status = kPendingSignalsPresent;
206     } else {
207       state_.pending_signals_status = kPendingSignalsDisabled;
208     }
209     EXPECT_TRUE(RunOneInstruction<2>(&state_, state_.cpu.insn_addr + expected_offset));
210     EXPECT_EQ(state_.cpu.insn_addr, code_start + expected_offset);
211   }
212 
213   void TestCOp(uint32_t insn_bytes,
214                std::initializer_list<std::tuple<uint64_t, uint64_t, uint64_t>> args) {
215     for (auto [arg1, arg2, expected_result] : args) {
216       SetXReg<1>(state_.cpu, arg1);
217       SetXReg<2>(state_.cpu, arg2);
218       RunInstruction<2>(insn_bytes);
219       EXPECT_EQ(GetXReg<1>(state_.cpu), expected_result);
220     }
221   }
222 
223   // Non-Compressed Instructions.
224 
225   void TestFFlagsOnGuestAndHost(uint8_t expected_guest_fflags) {
226     // Read fflags register.
227     RunInstruction(0x00102173);  // frflags x2
228     EXPECT_EQ(GetXReg<2>(state_.cpu), expected_guest_fflags);
229 
230     // Check corresponding fenv exception flags on host.
231     TestFeExceptForGuestFlags(expected_guest_fflags);
232   }
233 
234   void TestFCsr(uint32_t insn_bytes,
235                 uint8_t fcsr_to_set,
236                 uint8_t expected_fcsr,
237                 uint8_t expected_cpustate_frm) {
238     state_.cpu.frm =
239         0b100u;  // Pass non-zero frm to ensure that we don't accidentally rely on it being zero.
240     SetXReg<3>(state_.cpu, fcsr_to_set);
241     RunInstruction(insn_bytes);
242     EXPECT_EQ(GetXReg<2>(state_.cpu), 0b1000'0000ULL | expected_fcsr);
243     EXPECT_EQ(state_.cpu.frm, expected_cpustate_frm);
244   }
245 
246   void TestFrm(uint32_t insn_bytes, uint8_t frm_to_set, uint8_t expected_rm) {
247     state_.cpu.frm = 0b001u;
248     SetXReg<3>(state_.cpu, frm_to_set);
249     RunInstruction(insn_bytes);
250     EXPECT_EQ(GetXReg<2>(state_.cpu), 0b001u);
251     EXPECT_EQ(state_.cpu.frm, expected_rm);
252   }
253 
254   void TestOp(uint32_t insn_bytes,
255               std::initializer_list<std::tuple<uint64_t, uint64_t, uint64_t>> args) {
256     for (auto [arg1, arg2, expected_result] : args) {
257       SetXReg<2>(state_.cpu, arg1);
258       SetXReg<3>(state_.cpu, arg2);
259       RunInstruction(insn_bytes);
260       EXPECT_EQ(GetXReg<1>(state_.cpu), expected_result);
261     }
262   }
263 
264   template <typename... Types>
265   void TestOpFp(uint32_t insn_bytes, std::initializer_list<std::tuple<Types...>> args) {
266     for (auto [arg1, arg2, expected_result] : TupleMap(args, kFPValueToFPReg)) {
267       SetFReg<2>(state_.cpu, arg1);
268       SetFReg<3>(state_.cpu, arg2);
269       RunInstruction(insn_bytes);
270       EXPECT_EQ(GetFReg<1>(state_.cpu), expected_result);
271     }
272   }
273 
274   void TestOpImm(uint32_t insn_bytes,
275                  std::initializer_list<std::tuple<uint64_t, uint16_t, uint64_t>> args) {
276     for (auto [arg1, imm, expected_result] : args) {
277       CHECK_LE(imm, 63);
278       uint32_t insn_bytes_with_immediate = insn_bytes | imm << 20;
279       SetXReg<2>(state_.cpu, arg1);
280       RunInstruction(insn_bytes_with_immediate);
281       EXPECT_EQ(GetXReg<1>(state_.cpu), expected_result);
282     }
283   }
284 
285   void TestAuipc(uint32_t insn_bytes, uint64_t expected_offset) {
286     RunInstruction(insn_bytes);
287     EXPECT_EQ(GetXReg<1>(state_.cpu), expected_offset + ToGuestAddr(&insn_bytes));
288   }
289 
290   void TestLui(uint32_t insn_bytes, uint64_t expected_result) {
291     RunInstruction(insn_bytes);
292     EXPECT_EQ(GetXReg<1>(state_.cpu), expected_result);
293   }
294 
295   void TestBranch(uint32_t insn_bytes,
296                   std::initializer_list<std::tuple<uint64_t, uint64_t, int8_t>> args) {
297     auto code_start = ToGuestAddr(&insn_bytes);
298     for (auto [arg1, arg2, expected_offset] : args) {
299       state_.cpu.insn_addr = code_start;
300       SetXReg<1>(state_.cpu, arg1);
301       SetXReg<2>(state_.cpu, arg2);
302       EXPECT_TRUE(RunOneInstruction(&state_, state_.cpu.insn_addr + expected_offset));
303       EXPECT_EQ(state_.cpu.insn_addr, code_start + expected_offset);
304     }
305   }
306 
307   void TestJumpAndLink(uint32_t insn_bytes, int8_t expected_offset) {
308     auto code_start = ToGuestAddr(&insn_bytes);
309     state_.cpu.insn_addr = code_start;
310     EXPECT_TRUE(RunOneInstruction(&state_, state_.cpu.insn_addr + expected_offset));
311     EXPECT_EQ(state_.cpu.insn_addr, code_start + expected_offset);
312     EXPECT_EQ(GetXReg<1>(state_.cpu), code_start + 4);
313   }
314 
315   void TestLoad(uint32_t insn_bytes, uint64_t expected_result) {
316     // Offset is always 8.
317     SetXReg<2>(state_.cpu, ToGuestAddr(bit_cast<uint8_t*>(&kDataToLoad) - 8));
318     RunInstruction(insn_bytes);
319     EXPECT_EQ(GetXReg<1>(state_.cpu), expected_result);
320   }
321 
322   // kLinkRegisterOffsetIfUsed is size of instruction or 0 if instruction does not link register.
323   template <uint8_t kLinkRegisterOffsetIfUsed>
324   void TestJumpAndLinkRegister(uint32_t insn_bytes, uint64_t base_disp, int64_t expected_offset) {
325     auto code_start = ToGuestAddr(&insn_bytes);
326     state_.cpu.insn_addr = code_start;
327     SetXReg<1>(state_.cpu, 0);
328     SetXReg<2>(state_.cpu, code_start + base_disp);
329     EXPECT_TRUE(RunOneInstruction(&state_, state_.cpu.insn_addr + expected_offset));
330     EXPECT_EQ(state_.cpu.insn_addr, code_start + expected_offset);
331     if constexpr (kLinkRegisterOffsetIfUsed == 0) {
332       EXPECT_EQ(GetXReg<1>(state_.cpu), 0UL);
333     } else {
334       EXPECT_EQ(GetXReg<1>(state_.cpu), code_start + kLinkRegisterOffsetIfUsed);
335     }
336   }
337 
338   void TestStore(uint32_t insn_bytes, uint64_t expected_result) {
339     // Offset is always 8.
340     SetXReg<1>(state_.cpu, ToGuestAddr(bit_cast<uint8_t*>(&store_area_) - 8));
341     SetXReg<2>(state_.cpu, kDataToStore);
342     store_area_ = 0;
343     RunInstruction(insn_bytes);
344     EXPECT_EQ(store_area_, expected_result);
345   }
346 
347   template <typename... Types>
348   void TestFma(uint32_t insn_bytes, std::initializer_list<std::tuple<Types...>> args) {
349     for (auto [arg1, arg2, arg3, expected_result] : TupleMap(args, kFPValueToFPReg)) {
350       SetFReg<2>(state_.cpu, arg1);
351       SetFReg<3>(state_.cpu, arg2);
352       SetFReg<4>(state_.cpu, arg3);
353       RunInstruction(insn_bytes);
354       EXPECT_EQ(GetFReg<1>(state_.cpu), expected_result);
355     }
356   }
357 
358 #if (defined(TESTING_INTERPRETER) || defined(TESTING_HEAVY_OPTIMIZER))
359 
360   void TestAtomicLoad(uint32_t insn_bytes,
361                       const uint64_t* const data_to_load,
362                       uint64_t expected_result) {
363     state_.cpu.insn_addr = ToGuestAddr(&insn_bytes);
364     SetXReg<1>(state_.cpu, ToGuestAddr(data_to_load));
365     EXPECT_TRUE(RunOneInstruction(&state_, state_.cpu.insn_addr + 4));
366     EXPECT_EQ(GetXReg<2>(state_.cpu), expected_result);
367     EXPECT_EQ(state_.cpu.reservation_address, ToGuestAddr(data_to_load));
368     // We always reserve the full 64-bit range of the reservation address.
369     EXPECT_EQ(state_.cpu.reservation_value, *data_to_load);
370   }
371 
372   template <typename T>
373   void TestAtomicStore(uint32_t insn_bytes, T expected_result) {
374     store_area_ = ~uint64_t{0};
375     state_.cpu.insn_addr = ToGuestAddr(&insn_bytes);
376     SetXReg<1>(state_.cpu, ToGuestAddr(&store_area_));
377     SetXReg<2>(state_.cpu, kDataToStore);
378     SetXReg<3>(state_.cpu, 0xdeadbeef);
379     state_.cpu.reservation_address = ToGuestAddr(&store_area_);
380     state_.cpu.reservation_value = store_area_;
381     MemoryRegionReservation::SetOwner(ToGuestAddr(&store_area_), &state_.cpu);
382     EXPECT_TRUE(RunOneInstruction(&state_, state_.cpu.insn_addr + 4));
383     EXPECT_EQ(static_cast<T>(store_area_), expected_result);
384     EXPECT_EQ(GetXReg<3>(state_.cpu), 0u);
385   }
386 
387   void TestAtomicStoreNoLoadFailure(uint32_t insn_bytes) {
388     state_.cpu.insn_addr = ToGuestAddr(&insn_bytes);
389     SetXReg<1>(state_.cpu, ToGuestAddr(&store_area_));
390     SetXReg<2>(state_.cpu, kDataToStore);
391     SetXReg<3>(state_.cpu, 0xdeadbeef);
392     store_area_ = 0;
393     EXPECT_TRUE(RunOneInstruction(&state_, state_.cpu.insn_addr + 4));
394     EXPECT_EQ(store_area_, 0u);
395     EXPECT_EQ(GetXReg<3>(state_.cpu), 1u);
396   }
397 
398   void TestAtomicStoreDifferentLoadFailure(uint32_t insn_bytes) {
399     state_.cpu.insn_addr = ToGuestAddr(&insn_bytes);
400     SetXReg<1>(state_.cpu, ToGuestAddr(&store_area_));
401     SetXReg<2>(state_.cpu, kDataToStore);
402     SetXReg<3>(state_.cpu, 0xdeadbeef);
403     state_.cpu.reservation_address = ToGuestAddr(&kDataToStore);
404     state_.cpu.reservation_value = 0;
405     MemoryRegionReservation::SetOwner(ToGuestAddr(&kDataToStore), &state_.cpu);
406     store_area_ = 0;
407     EXPECT_TRUE(RunOneInstruction(&state_, state_.cpu.insn_addr + 4));
408     EXPECT_EQ(store_area_, 0u);
409     EXPECT_EQ(GetXReg<3>(state_.cpu), 1u);
410   }
411 
412 #endif  // (defined(TESTING_INTERPRETER) || defined(TESTING_HEAVY_OPTIMIZER))
413 
414   void TestAmo(uint32_t insn_bytes,
415                uint64_t arg1,
416                uint64_t arg2,
417                uint64_t expected_result,
418                uint64_t expected_memory) {
419     // Copy arg1 into store_area_
420     store_area_ = arg1;
421     SetXReg<2>(state_.cpu, ToGuestAddr(bit_cast<uint8_t*>(&store_area_)));
422     SetXReg<3>(state_.cpu, arg2);
423     RunInstruction(insn_bytes);
424     EXPECT_EQ(GetXReg<1>(state_.cpu), expected_result);
425     EXPECT_EQ(store_area_, expected_memory);
426   }
427 
428   void TestAmo(uint32_t insn_bytes32, uint32_t insn_bytes64, uint64_t expected_memory) {
429     TestAmo(insn_bytes32,
430             0xffff'eeee'dddd'ccccULL,
431             0xaaaa'bbbb'cccc'ddddULL,
432             0xffff'ffff'dddd'ccccULL,
433             0xffff'eeee'0000'0000 | uint32_t(expected_memory));
434     TestAmo(insn_bytes64,
435             0xffff'eeee'dddd'ccccULL,
436             0xaaaa'bbbb'cccc'ddddULL,
437             0xffff'eeee'dddd'ccccULL,
438             expected_memory);
439   }
440 
441   template <typename... Types>
442   void TestFmvFloatToInteger(uint32_t insn_bytes,
443                              std::initializer_list<std::tuple<Types...>> args) {
444     for (auto [arg, expected_result] : TupleMap(args, kFPValueToFPReg)) {
445       SetFReg<1>(state_.cpu, arg);
446       RunInstruction(insn_bytes);
447       EXPECT_EQ(GetXReg<1>(state_.cpu), expected_result);
448     }
449   }
450 
451   template <typename... Types>
452   void TestFmvIntegerToFloat(uint32_t insn_bytes,
453                              std::initializer_list<std::tuple<Types...>> args) {
454     for (auto [arg, expected_result] : args) {
455       SetXReg<1>(state_.cpu, arg);
456       RunInstruction(insn_bytes);
457       EXPECT_EQ(GetFReg<1>(state_.cpu), kFPValueToFPReg(expected_result));
458     }
459   }
460 
461   template <typename... Types>
462   void TestOpFpGpRegisterTarget(uint32_t insn_bytes,
463                                 std::initializer_list<std::tuple<Types...>> args) {
464     for (auto [arg1, arg2, expected_result] : TupleMap(args, kFPValueToFPReg)) {
465       SetFReg<2>(state_.cpu, arg1);
466       SetFReg<3>(state_.cpu, arg2);
467       RunInstruction(insn_bytes);
468       EXPECT_EQ(GetXReg<1>(state_.cpu), expected_result);
469     }
470   }
471 
472   template <typename... Types>
473   void TestOpFpGpRegisterTargetSingleInput(uint32_t insn_bytes,
474                                            std::initializer_list<std::tuple<Types...>> args) {
475     for (auto [arg, expected_result] : TupleMap(args, kFPValueToFPReg)) {
476       SetFReg<2>(state_.cpu, arg);
477       RunInstruction(insn_bytes);
478       EXPECT_EQ(GetXReg<1>(state_.cpu), expected_result);
479     }
480   }
481 
482   template <typename... Types>
483   void TestOpFpGpRegisterSourceSingleInput(uint32_t insn_bytes,
484                                            std::initializer_list<std::tuple<Types...>> args) {
485     for (auto [arg, expected_result] : TupleMap(args, kFPValueToFPReg)) {
486       SetXReg<2>(state_.cpu, arg);
487       RunInstruction(insn_bytes);
488       EXPECT_EQ(GetFReg<1>(state_.cpu), expected_result);
489     }
490   }
491 
492   template <typename... Types>
493   void TestOpFpSingleInput(uint32_t insn_bytes, std::initializer_list<std::tuple<Types...>> args) {
494     for (auto [arg, expected_result] : TupleMap(args, kFPValueToFPReg)) {
495       SetFReg<2>(state_.cpu, arg);
496       RunInstruction(insn_bytes);
497       EXPECT_EQ(GetFReg<1>(state_.cpu), expected_result);
498     }
499   }
500 
501   void TestLoadFp(uint32_t insn_bytes, uint64_t expected_result) {
502     // Offset is always 8.
503     SetXReg<2>(state_.cpu, ToGuestAddr(bit_cast<uint8_t*>(&kDataToLoad) - 8));
504     RunInstruction(insn_bytes);
505     EXPECT_EQ(GetFReg<1>(state_.cpu), expected_result);
506   }
507 
508   void TestStoreFp(uint32_t insn_bytes, uint64_t expected_result) {
509     // Offset is always 8.
510     SetXReg<1>(state_.cpu, ToGuestAddr(bit_cast<uint8_t*>(&store_area_) - 8));
511     SetFReg<2>(state_.cpu, kDataToStore);
512     store_area_ = 0;
513     RunInstruction(insn_bytes);
514     EXPECT_EQ(store_area_, expected_result);
515   }
516 
517   void TestVsetvl(
518       uint32_t insn_bytes,
519       std::initializer_list<std::tuple<uint64_t, uint64_t, uint64_t, uint64_t, uint64_t, uint64_t>>
520           args) {
521     for (auto [vl_orig, vtype_orig, avl, vtype_new, vl_expected, vtype_expected] : args) {
522       state_.cpu.vl = vl_orig;
523       state_.cpu.vtype = vtype_orig;
524       SetXReg<1>(state_.cpu, ~0ULL);
525       SetXReg<2>(state_.cpu, avl);
526       SetXReg<3>(state_.cpu, vtype_new);
527       RunInstruction(insn_bytes);
528       if (insn_bytes & 0b11111'0000000) {
529         EXPECT_EQ(GetXReg<1>(state_.cpu), vl_expected);
530       } else {
531         EXPECT_EQ(GetXReg<1>(state_.cpu), ~0ULL);
532       }
533       EXPECT_EQ(state_.cpu.vl, vl_expected);
534       EXPECT_EQ(state_.cpu.vtype, vtype_expected);
535     }
536   }
537 
538  protected:
539   static constexpr uint64_t kDataToLoad{0xffffeeeeddddccccULL};
540   static constexpr uint64_t kDataToStore = kDataToLoad;
541   uint64_t store_area_;
542   ThreadState state_;
543 };
544 
545 // Tests for Compressed Instructions.
546 template <uint16_t opcode, auto execute_instruction_func>
547 void TestCompressedLoadOrStore32bit(TESTSUITE* that) {
548   union {
549     uint16_t offset;
550     struct [[gnu::packed]] {
551       uint8_t : 2;
552       uint8_t i2 : 1;
553       uint8_t i3_i5 : 3;
554       uint8_t i6 : 1;
555     } i_bits;
556   };
557   for (offset = uint8_t{0}; offset < uint8_t{128}; offset += 4) {
558     union {
559       int16_t parcel;
560       struct [[gnu::packed]] {
561         uint8_t low_opcode : 2;
562         uint8_t rd : 3;
563         uint8_t i6 : 1;
564         uint8_t i2 : 1;
565         uint8_t rs : 3;
566         uint8_t i3_i5 : 3;
567         uint8_t high_opcode : 3;
568       } __attribute__((__packed__));
569     } o_bits = {
570         .low_opcode = 0b00,
571         .rd = 1,
572         .i6 = i_bits.i6,
573         .i2 = i_bits.i2,
574         .rs = 0,
575         .i3_i5 = i_bits.i3_i5,
576         .high_opcode = 0b000,
577     };
578     (that->*execute_instruction_func)(o_bits.parcel | opcode, offset);
579   }
580 }
581 
582 TEST_F(TESTSUITE, CompressedLoadAndStores32bit) {
583   // c.Lw
584   TestCompressedLoadOrStore32bit<
585       0b010'000'000'00'000'00,
586       &TESTSUITE::TestCompressedLoad<RegisterType::kReg,
587                                      static_cast<uint64_t>(static_cast<int32_t>(kDataToLoad)),
588                                      8>>(this);
589   // c.Sw
590   TestCompressedLoadOrStore32bit<
591       0b110'000'000'00'000'00,
592       &TESTSUITE::TestCompressedStore<RegisterType::kReg,
593                                       static_cast<uint64_t>(static_cast<uint32_t>(kDataToLoad)),
594                                       8>>(this);
595 }
596 
597 template <uint16_t opcode, auto execute_instruction_func>
598 void TestCompressedLoadOrStore64bit(TESTSUITE* that) {
599   union {
600     uint16_t offset;
601     struct [[gnu::packed]] {
602       uint8_t : 3;
603       uint8_t i3_i5 : 3;
604       uint8_t i6_i7 : 2;
605     } i_bits;
606   };
607   for (offset = int16_t{0}; offset < int16_t{256}; offset += 8) {
608     union {
609       int16_t parcel;
610       struct [[gnu::packed]] {
611         uint8_t low_opcode : 2;
612         uint8_t rd : 3;
613         uint8_t i6_i7 : 2;
614         uint8_t rs : 3;
615         uint8_t i3_i5 : 3;
616         uint8_t high_opcode : 3;
617       };
618     } o_bits = {
619         .low_opcode = 0b00,
620         .rd = 1,
621         .i6_i7 = i_bits.i6_i7,
622         .rs = 0,
623         .i3_i5 = i_bits.i3_i5,
624         .high_opcode = 0b000,
625     };
626     (that->*execute_instruction_func)(o_bits.parcel | opcode, offset);
627   }
628 }
629 
630 TEST_F(TESTSUITE, CompressedLoadAndStores) {
631   // c.Ld
632   TestCompressedLoadOrStore64bit<
633       0b011'000'000'00'000'00,
634       &TESTSUITE::TestCompressedLoad<RegisterType::kReg, kDataToLoad, 8>>(this);
635   // c.Sd
636   TestCompressedLoadOrStore64bit<
637       0b111'000'000'00'000'00,
638       &TESTSUITE::TestCompressedStore<RegisterType::kReg, kDataToLoad, 8>>(this);
639   // c.Fld
640   TestCompressedLoadOrStore64bit<
641       0b001'000'000'00'000'00,
642       &TESTSUITE::TestCompressedLoad<RegisterType::kFpReg, kDataToLoad, 8>>(this);
643   // c.Fsd
644   TestCompressedLoadOrStore64bit<
645       0b101'000'000'00'000'00,
646       &TESTSUITE::TestCompressedStore<RegisterType::kFpReg, kDataToLoad, 8>>(this);
647 }
648 
649 TEST_F(TESTSUITE, TestCompressedStore32bitsp) {
650   union {
651     uint16_t offset;
652     struct [[gnu::packed]] {
653       uint8_t : 2;
654       uint8_t i2_i5 : 4;
655       uint8_t i6_i7 : 2;
656     } i_bits;
657   };
658   for (offset = uint16_t{0}; offset < uint16_t{256}; offset += 4) {
659     union {
660       int16_t parcel;
661       struct [[gnu::packed]] {
662         uint8_t low_opcode : 2;
663         uint8_t rs2 : 5;
664         uint8_t i6_i7 : 2;
665         uint8_t i2_i5 : 4;
666         uint8_t high_opcode : 3;
667       };
668     } o_bits = {
669         .low_opcode = 0b10,
670         .rs2 = 9,
671         .i6_i7 = i_bits.i6_i7,
672         .i2_i5 = i_bits.i2_i5,
673         .high_opcode = 0b110,
674     };
675     // c.Swsp
676     TestCompressedStore<RegisterType::kReg,
677                         static_cast<uint64_t>(static_cast<uint32_t>(kDataToStore)),
678                         2>(o_bits.parcel, offset);
679   }
680 }
681 
682 template <uint16_t opcode, auto execute_instruction_func>
683 void TestCompressedStore64bitsp(TESTSUITE* that) {
684   union {
685     uint16_t offset;
686     struct [[gnu::packed]] {
687       uint8_t : 3;
688       uint8_t i3_i5 : 3;
689       uint8_t i6_i8 : 3;
690     } i_bits;
691   };
692   for (offset = uint16_t{0}; offset < uint16_t{512}; offset += 8) {
693     union {
694       int16_t parcel;
695       struct [[gnu::packed]] {
696         uint8_t low_opcode : 2;
697         uint8_t rs2 : 5;
698         uint8_t i6_i8 : 3;
699         uint8_t i3_i5 : 3;
700         uint8_t high_opcode : 3;
701       };
702     } o_bits = {
703         .low_opcode = 0b10,
704         .rs2 = 9,
705         .i6_i8 = i_bits.i6_i8,
706         .i3_i5 = i_bits.i3_i5,
707         .high_opcode = 0b101,
708     };
709     (that->*execute_instruction_func)(o_bits.parcel | opcode, offset);
710   }
711 }
712 
713 TEST_F(TESTSUITE, TestCompressedStore64bitsp) {
714   // c.Sdsp
715   TestCompressedStore64bitsp<0b011'000'000'00'000'00,
716                              &TESTSUITE::TestCompressedStore<RegisterType::kReg, kDataToStore, 2>>(
717       this);
718   // c.Fsdsp
719   TestCompressedStore64bitsp<
720       0b001'000'000'00'000'00,
721       &TESTSUITE::TestCompressedStore<RegisterType::kFpReg, kDataToStore, 2>>(this);
722 }
723 
724 TEST_F(TESTSUITE, TestCompressedLoad32bitsp) {
725   union {
726     uint16_t offset;
727     struct [[gnu::packed]] {
728       uint8_t : 2;
729       uint8_t i2_i4 : 3;
730       uint8_t i5 : 1;
731       uint8_t i6_i7 : 2;
732     } i_bits;
733   };
734   for (offset = uint16_t{0}; offset < uint16_t{256}; offset += 4) {
735     union {
736       int16_t parcel;
737       struct [[gnu::packed]] {
738         uint8_t low_opcode : 2;
739         uint8_t i6_i7 : 2;
740         uint8_t i2_i4 : 3;
741         uint8_t rd : 5;
742         uint8_t i5 : 1;
743         uint8_t high_opcode : 3;
744       };
745     } o_bits = {
746         .low_opcode = 0b10,
747         .i6_i7 = i_bits.i6_i7,
748         .i2_i4 = i_bits.i2_i4,
749         .rd = 9,
750         .i5 = i_bits.i5,
751         .high_opcode = 0b010,
752     };
753     // c.Lwsp
754     TestCompressedLoad<RegisterType::kReg,
755                        static_cast<uint64_t>(static_cast<int32_t>(kDataToLoad)),
756                        2>(o_bits.parcel, offset);
757   }
758 }
759 
760 template <uint16_t opcode, auto execute_instruction_func>
761 void TestCompressedLoad64bitsp(TESTSUITE* that) {
762   union {
763     uint16_t offset;
764     struct [[gnu::packed]] {
765       uint8_t : 3;
766       uint8_t i3_i4 : 2;
767       uint8_t i5 : 1;
768       uint8_t i6_i8 : 3;
769     } i_bits;
770   };
771   for (offset = uint16_t{0}; offset < uint16_t{512}; offset += 8) {
772     union {
773       int16_t parcel;
774       struct [[gnu::packed]] {
775         uint8_t low_opcode : 2;
776         uint8_t i6_i8 : 3;
777         uint8_t i3_i4 : 2;
778         uint8_t rd : 5;
779         uint8_t i5 : 1;
780         uint8_t high_opcode : 3;
781       };
782     } o_bits = {
783         .low_opcode = 0b10,
784         .i6_i8 = i_bits.i6_i8,
785         .i3_i4 = i_bits.i3_i4,
786         .rd = 9,
787         .i5 = i_bits.i5,
788         .high_opcode = 0b001,
789     };
790     (that->*execute_instruction_func)(o_bits.parcel | opcode, offset);
791   }
792 }
793 
794 TEST_F(TESTSUITE, TestCompressedLoad64bitsp) {
795   // c.Ldsp
796   TestCompressedLoad64bitsp<0b011'000'000'00'000'00,
797                             &TESTSUITE::TestCompressedLoad<RegisterType::kReg, kDataToLoad, 2>>(
798       this);
799   // c.Fldsp
800   TestCompressedLoad64bitsp<0b001'000'000'00'000'00,
801                             &TESTSUITE::TestCompressedLoad<RegisterType::kFpReg, kDataToLoad, 2>>(
802       this);
803 }
804 
805 TEST_F(TESTSUITE, CAddi) {
806   union {
807     int8_t offset;
808     struct [[gnu::packed]] {
809       uint8_t i4_i0 : 5;
810       uint8_t i5 : 1;
811     } i_bits;
812   };
813   for (offset = int8_t{-32}; offset < int8_t{31}; offset++) {
814     union {
815       int16_t parcel;
816       struct [[gnu::packed]] {
817         uint8_t low_opcode : 2;
818         uint8_t i4_i0 : 5;
819         uint8_t r : 5;
820         uint8_t i5 : 1;
821         uint8_t high_opcode : 3;
822       } __attribute__((__packed__));
823     } o_bits = {
824         .low_opcode = 0,
825         .i4_i0 = i_bits.i4_i0,
826         .r = 2,
827         .i5 = i_bits.i5,
828         .high_opcode = 0,
829     };
830     // c.Addi
831     TestCAddi(o_bits.parcel | 0b0000'0000'0000'0001, offset);
832     // c.Addiw
833     TestCAddi(o_bits.parcel | 0b0010'0000'0000'0001, offset);
834   }
835 }
836 
837 TEST_F(TESTSUITE, CAddi16sp) {
838   union {
839     int16_t offset;
840     struct [[gnu::packed]] {
841       uint8_t : 4;
842       uint8_t i4 : 1;
843       uint8_t i5 : 1;
844       uint8_t i6 : 1;
845       uint8_t i7 : 1;
846       uint8_t i8 : 1;
847       uint8_t i9 : 1;
848     } i_bits;
849   };
850   for (offset = int16_t{-512}; offset < int16_t{512}; offset += 16) {
851     union {
852       int16_t parcel;
853       struct [[gnu::packed]] {
854         uint8_t low_opcode : 2;
855         uint8_t i5 : 1;
856         uint8_t i7 : 1;
857         uint8_t i8 : 1;
858         uint8_t i6 : 1;
859         uint8_t i4 : 1;
860         uint8_t rd : 5;
861         uint8_t i9 : 1;
862         uint8_t high_opcode : 3;
863       };
864     } o_bits = {
865         .low_opcode = 0b01,
866         .i5 = i_bits.i5,
867         .i7 = i_bits.i7,
868         .i8 = i_bits.i8,
869         .i6 = i_bits.i6,
870         .i4 = i_bits.i4,
871         .rd = 2,
872         .i9 = i_bits.i9,
873         .high_opcode = 0b011,
874     };
875     TestCAddi16sp(o_bits.parcel, offset);
876   }
877 }
878 
879 TEST_F(TESTSUITE, CLui) {
880   union {
881     int32_t offset;
882     struct [[gnu::packed]] {
883       uint8_t : 12;
884       uint8_t i12_i16 : 5;
885       uint8_t i17 : 1;
886     } i_bits;
887   };
888   for (offset = int32_t{-131072}; offset < int32_t{131072}; offset += 4096) {
889     union {
890       int16_t parcel;
891       struct [[gnu::packed]] {
892         uint8_t low_opcode : 2;
893         uint8_t i12_i16 : 5;
894         uint8_t rd : 5;
895         uint8_t i17 : 1;
896         uint8_t high_opcode : 3;
897       };
898     } o_bits = {
899         .low_opcode = 0b01,
900         .i12_i16 = i_bits.i12_i16,
901         .rd = 1,
902         .i17 = i_bits.i17,
903         .high_opcode = 0b011,
904     };
905     TestLi(o_bits.parcel, offset);
906   }
907 }
908 
909 TEST_F(TESTSUITE, CLi) {
910   union {
911     int8_t offset;
912     struct [[gnu::packed]] {
913       uint8_t i0_i4 : 5;
914       uint8_t i5 : 1;
915     } i_bits;
916   };
917   for (offset = int8_t{-32}; offset < int8_t{32}; offset++) {
918     union {
919       int16_t parcel;
920       struct [[gnu::packed]] {
921         uint8_t low_opcode : 2;
922         uint8_t i0_i4 : 5;
923         uint8_t rd : 5;
924         uint8_t i5 : 1;
925         uint8_t high_opcode : 3;
926       };
927     } o_bits = {
928         .low_opcode = 0b01,
929         .i0_i4 = i_bits.i0_i4,
930         .rd = 1,
931         .i5 = i_bits.i5,
932         .high_opcode = 0b010,
933     };
934     TestLi(o_bits.parcel, offset);
935   }
936 }
937 
938 TEST_F(TESTSUITE, CAddi4spn) {
939   union {
940     int16_t offset;
941     struct [[gnu::packed]] {
942       uint8_t : 2;
943       uint8_t i2 : 1;
944       uint8_t i3 : 1;
945       uint8_t i4 : 1;
946       uint8_t i5 : 1;
947       uint8_t i6 : 1;
948       uint8_t i7 : 1;
949       uint8_t i8 : 1;
950       uint8_t i9 : 1;
951     } i_bits;
952   };
953   for (offset = int16_t{4}; offset < int16_t{1024}; offset += 4) {
954     union {
955       int16_t parcel;
956       struct [[gnu::packed]] {
957         uint8_t low_opcode : 2;
958         uint8_t rd : 3;
959         uint8_t i3 : 1;
960         uint8_t i2 : 1;
961         uint8_t i6 : 1;
962         uint8_t i7 : 1;
963         uint8_t i8 : 1;
964         uint8_t i9 : 1;
965         uint8_t i4 : 1;
966         uint8_t i5 : 1;
967         uint8_t high_opcode : 3;
968       };
969     } o_bits = {
970         .low_opcode = 0b00,
971         .rd = 1,
972         .i3 = i_bits.i3,
973         .i2 = i_bits.i2,
974         .i6 = i_bits.i6,
975         .i7 = i_bits.i7,
976         .i8 = i_bits.i8,
977         .i9 = i_bits.i9,
978         .i4 = i_bits.i4,
979         .i5 = i_bits.i5,
980         .high_opcode = 0b000,
981     };
982     TestCAddi4spn(o_bits.parcel, offset);
983   }
984 }
985 
986 TEST_F(TESTSUITE, CBeqzBnez) {
987   union {
988     int16_t offset;
989     struct [[gnu::packed]] {
990       uint8_t : 1;
991       uint8_t i1 : 1;
992       uint8_t i2 : 1;
993       uint8_t i3 : 1;
994       uint8_t i4 : 1;
995       uint8_t i5 : 1;
996       uint8_t i6 : 1;
997       uint8_t i7 : 1;
998       uint8_t i8 : 1;
999     } i_bits;
1000   };
1001   for (offset = int16_t{-256}; offset < int16_t{256}; offset += 8) {
1002     union {
1003       int16_t parcel;
1004       struct [[gnu::packed]] {
1005         uint8_t low_opcode : 2;
1006         uint8_t i5 : 1;
1007         uint8_t i1 : 1;
1008         uint8_t i2 : 1;
1009         uint8_t i6 : 1;
1010         uint8_t i7 : 1;
1011         uint8_t rs : 3;
1012         uint8_t i3 : 1;
1013         uint8_t i4 : 1;
1014         uint8_t i8 : 1;
1015         uint8_t high_opcode : 3;
1016       };
1017     } o_bits = {
1018         .low_opcode = 0,
1019         .i5 = i_bits.i5,
1020         .i1 = i_bits.i1,
1021         .i2 = i_bits.i2,
1022         .i6 = i_bits.i6,
1023         .i7 = i_bits.i7,
1024         .rs = 1,
1025         .i3 = i_bits.i3,
1026         .i4 = i_bits.i4,
1027         .i8 = i_bits.i8,
1028         .high_opcode = 0,
1029     };
1030     TestCBeqzBnez(o_bits.parcel | 0b1100'0000'0000'0001, 0, offset);
1031     TestCBeqzBnez(o_bits.parcel | 0b1110'0000'0000'0001, 1, offset);
1032   }
1033 }
1034 
1035 TEST_F(TESTSUITE, CMiscAluInstructions) {
1036   // c.Sub
1037   TestCMiscAlu(0x8c05, {{42, 23, 19}});
1038   // c.Xor
1039   TestCMiscAlu(0x8c25, {{0b0101, 0b0011, 0b0110}});
1040   // c.Or
1041   TestCMiscAlu(0x8c45, {{0b0101, 0b0011, 0b0111}});
1042   // c.And
1043   TestCMiscAlu(0x8c65, {{0b0101, 0b0011, 0b0001}});
1044   // c.SubW
1045   TestCMiscAlu(0x9c05, {{42, 23, 19}});
1046   // c.AddW
1047   TestCMiscAlu(0x9c25, {{19, 23, 42}});
1048 }
1049 
1050 TEST_F(TESTSUITE, CMiscAluImm) {
1051   union {
1052     uint8_t uimm;
1053     // Note: c.Andi uses sign-extended immediate while c.Srli/c.cSrain need zero-extended one.
1054     // If we store the value into uimm and read from imm compiler would do correct conversion.
1055     int8_t imm : 6;
1056     struct [[gnu::packed]] {
1057       uint8_t i0_i4 : 5;
1058       uint8_t i5 : 1;
1059     } i_bits;
1060   };
1061   for (uimm = uint8_t{0}; uimm < uint8_t{64}; uimm++) {
1062     union {
1063       int16_t parcel;
1064       struct [[gnu::packed]] {
1065         uint8_t low_opcode : 2;
1066         uint8_t i0_i4 : 5;
1067         uint8_t r : 3;
1068         uint8_t mid_opcode : 2;
1069         uint8_t i5 : 1;
1070         uint8_t high_opcode : 3;
1071       };
1072     } o_bits = {
1073         .low_opcode = 0,
1074         .i0_i4 = i_bits.i0_i4,
1075         .r = 1,
1076         .mid_opcode = 0,
1077         .i5 = i_bits.i5,
1078         .high_opcode = 0,
1079     };
1080     // The o_bits.parcel here doesn't include opcodes and we are adding it in the function call.
1081     // c.Srli
1082     TestCMiscAluImm(o_bits.parcel | 0b1000'0000'0000'0001,
1083                     0x8000'0000'0000'0000ULL,
1084                     0x8000'0000'0000'0000ULL >> uimm);
1085     // c.Srai
1086     TestCMiscAluImm(o_bits.parcel | 0b1000'0100'0000'0001,
1087                     0x8000'0000'0000'0000LL,
1088                     ~0 ^ ((0x8000'0000'0000'0000 ^ ~0) >>
1089                           uimm));  // Avoid shifting negative numbers to avoid UB
1090     // c.Andi
1091     TestCMiscAluImm(o_bits.parcel | 0b1000'1000'0000'0001,
1092                     0xffff'ffff'ffff'ffffULL,
1093                     0xffff'ffff'ffff'ffffULL & imm);
1094 
1095     // Previous instructions use 3-bit register encoding where 0b000 is r8, 0b001 is r9, etc.
1096     // c.Slli uses 5-bit register encoding. Since we want it to also work with r9 in the test body
1097     // we add 0b01000 to register bits to mimic that shift-by-8.
1098     // c.Slli                                   vvvvvv adds 8 to r to handle rd' vs rd difference.
1099     TestCMiscAluImm(o_bits.parcel | 0b0000'0100'0000'0010,
1100                     0x0000'0000'0000'0001ULL,
1101                     0x0000'0000'0000'0001ULL << uimm);
1102   }
1103 }
1104 
1105 TEST_F(TESTSUITE, CJ) {
1106   union {
1107     int16_t offset;
1108     struct [[gnu::packed]] {
1109       uint8_t : 1;
1110       uint8_t i1 : 1;
1111       uint8_t i2 : 1;
1112       uint8_t i3 : 1;
1113       uint8_t i4 : 1;
1114       uint8_t i5 : 1;
1115       uint8_t i6 : 1;
1116       uint8_t i7 : 1;
1117       uint8_t i8 : 1;
1118       uint8_t i9 : 1;
1119       uint8_t i10 : 1;
1120       uint8_t i11 : 1;
1121     } i_bits;
1122   };
1123   for (offset = int16_t{-2048}; offset < int16_t{2048}; offset += 2) {
1124     union {
1125       int16_t parcel;
1126       struct [[gnu::packed]] {
1127         uint8_t low_opcode : 2;
1128         uint8_t i5 : 1;
1129         uint8_t i1 : 1;
1130         uint8_t i2 : 1;
1131         uint8_t i3 : 1;
1132         uint8_t i7 : 1;
1133         uint8_t i6 : 1;
1134         uint8_t i10 : 1;
1135         uint8_t i8 : 1;
1136         uint8_t i9 : 1;
1137         uint8_t i4 : 1;
1138         uint8_t i11 : 1;
1139         uint8_t high_opcode : 3;
1140       };
1141     } o_bits = {
1142         .low_opcode = 0b01,
1143         .i5 = i_bits.i5,
1144         .i1 = i_bits.i1,
1145         .i2 = i_bits.i2,
1146         .i3 = i_bits.i3,
1147         .i7 = i_bits.i7,
1148         .i6 = i_bits.i6,
1149         .i10 = i_bits.i10,
1150         .i8 = i_bits.i8,
1151         .i9 = i_bits.i9,
1152         .i4 = i_bits.i4,
1153         .i11 = i_bits.i11,
1154         .high_opcode = 0b101,
1155     };
1156     TestCJ(o_bits.parcel, offset);
1157   }
1158 }
1159 
1160 TEST_F(TESTSUITE, CJalr) {
1161   // C.Jr
1162   TestJumpAndLinkRegister<0>(0x8102, 42, 42);
1163   // C.Mv
1164   TestCOp(0x808a, {{0, 1, 1}});
1165   // C.Jalr
1166   TestJumpAndLinkRegister<2>(0x9102, 42, 42);
1167   // C.Add
1168   TestCOp(0x908a, {{1, 2, 3}});
1169 }
1170 
1171 // Tests for Non-Compressed Instructions.
1172 
1173 TEST_F(TESTSUITE, CsrInstructions) {
1174   ScopedRoundingMode scoped_rounding_mode;
1175   // Csrrw x2, frm, 2
1176   TestFrm(0x00215173, 0, 2);
1177   // Csrrsi x2, frm, 2
1178   TestFrm(0x00216173, 0, 3);
1179   // Csrrci x2, frm, 1
1180   TestFrm(0x0020f173, 0, 0);
1181 }
1182 
1183 constexpr uint8_t kFPFlagsAll = FPFlags::NX | FPFlags::UF | FPFlags::OF | FPFlags::DZ | FPFlags::NV;
1184 // Ensure all trailing bits are set in kFPFlagsAll so that all combinations are possible.
1185 static_assert(__builtin_ctz(~kFPFlagsAll) == 5);
1186 
1187 // Automatically saves and restores fenv throughout the lifetime of a parent scope.
1188 class ScopedFenv {
1189  public:
1190   ScopedFenv() { EXPECT_EQ(fegetenv(&env_), 0); }
1191   ~ScopedFenv() { EXPECT_EQ(fesetenv(&env_), 0); }
1192 
1193  private:
1194   fenv_t env_;
1195 };
1196 
1197 TEST_F(TESTSUITE, FFlagsRead) {
1198   ScopedFenv fenv;
1199   for (uint8_t fflags = 0; fflags <= kFPFlagsAll; fflags++) {
1200     RaiseFeExceptForGuestFlags(fflags);
1201     RunInstruction(0x00102173);  // frflags x2
1202     EXPECT_EQ(GetXReg<2>(state_.cpu), fflags);
1203   }
1204 }
1205 
1206 TEST_F(TESTSUITE, FFlagsSwap) {
1207   ScopedFenv fenv;
1208   for (uint8_t fflags = 0; fflags <= kFPFlagsAll; fflags++) {
1209     RaiseFeExceptForGuestFlags(fflags);
1210     // After swapping in 0 for flags, read fflags to verify.
1211     SetXReg<3>(state_.cpu, 0);
1212     RunInstruction(0x00119173);  // fsflags x2, x3
1213     EXPECT_EQ(GetXReg<2>(state_.cpu), fflags);
1214     TestFFlagsOnGuestAndHost(0u);
1215   }
1216 }
1217 
1218 TEST_F(TESTSUITE, FFlagsSwapImmediate) {
1219   ScopedFenv fenv;
1220   for (uint8_t fflags = 0; fflags <= kFPFlagsAll; fflags++) {
1221     RaiseFeExceptForGuestFlags(fflags);
1222     // After swapping in 0 for flags, read fflags to verify.
1223     RunInstruction(0x00105173);  // fsflags x2, 0
1224     EXPECT_EQ(GetXReg<2>(state_.cpu), fflags);
1225     TestFFlagsOnGuestAndHost(0u);
1226   }
1227 }
1228 
1229 TEST_F(TESTSUITE, FFlagsWrite) {
1230   ScopedFenv fenv;
1231   for (uint8_t fflags = 0; fflags <= kFPFlagsAll; fflags++) {
1232     SetXReg<3>(state_.cpu, fflags);
1233     RunInstruction(0x00119073);  // fsflags x3
1234     TestFFlagsOnGuestAndHost(fflags);
1235   }
1236 }
1237 
1238 TEST_F(TESTSUITE, FFlagsWriteImmediate) {
1239   ScopedFenv fenv;
1240   for (uint8_t fflags = 0; fflags <= kFPFlagsAll; fflags++) {
1241     RunInstruction(0x00105073 | fflags << 15);  // fsflagsi 0 (+ fflags)
1242     TestFFlagsOnGuestAndHost(fflags);
1243   }
1244 }
1245 
1246 TEST_F(TESTSUITE, FFlagsClearBits) {
1247   ScopedFenv fenv;
1248   for (uint8_t fflags = 0; fflags <= kFPFlagsAll; fflags++) {
1249     RaiseFeExceptForGuestFlags(kFPFlagsAll);
1250     SetXReg<3>(state_.cpu, fflags);
1251     RunInstruction(0x0011b073);  // csrc fflags, x3
1252     // Read fflags to verify previous bitwise clear operation.
1253     TestFFlagsOnGuestAndHost(static_cast<uint8_t>(~fflags & kFPFlagsAll));
1254   }
1255 }
1256 
1257 TEST_F(TESTSUITE, FFlagsClearBitsImmediate) {
1258   ScopedFenv fenv;
1259   for (uint8_t fflags = 0; fflags <= kFPFlagsAll; fflags++) {
1260     RaiseFeExceptForGuestFlags(kFPFlagsAll);
1261     RunInstruction(0x00107073 | fflags << 15);  // csrci fflags, 0 (+ fflags)
1262     // Read fflags to verify previous bitwise clear operation.
1263     TestFFlagsOnGuestAndHost(static_cast<uint8_t>(~fflags & kFPFlagsAll));
1264   }
1265 }
1266 
1267 TEST_F(TESTSUITE, FCsrRegister) {
1268   ScopedFenv fenv;
1269   for (uint8_t fflags = 0; fflags <= kFPFlagsAll; fflags++) {
1270     RaiseFeExceptForGuestFlags(fflags);
1271 
1272     // Read and verify fflags, then replace with all flags.
1273     TestFCsr(0x00319173 /* fscsr x2,x3 */, fflags, fflags, 0);
1274 
1275     // Only read fcsr and verify fflags.
1276     TestFCsr(0x00302173 /* frcsr x2 */, /* ignored */ 0, fflags, /* expected_frm= */ 0b100u);
1277   }
1278 
1279   for (bool immediate_source : {true, false}) {
1280     for (uint8_t fflags = 0; fflags <= kFPFlagsAll; fflags++) {
1281       EXPECT_EQ(feclearexcept(FE_ALL_EXCEPT), 0);
1282       if (immediate_source) {
1283         TestFCsr(0x00305173 /* csrrwi x2,fcsr,0 */ | (fflags << 15), 0, 0, 0);
1284       } else {
1285         TestFCsr(0x00319173 /* fscsr x2,x3 */, 0b100'0000 | fflags, 0, /* expected_frm= */ 0b010u);
1286       }
1287       TestFFlagsOnGuestAndHost(fflags);
1288     }
1289   }
1290 }
1291 
1292 TEST_F(TESTSUITE, FsrRegister) {
1293   ScopedRoundingMode scoped_rounding_mode;
1294   int rounding[][2] = {{0, FE_TONEAREST},
1295                        {1, FE_TOWARDZERO},
1296                        {2, FE_DOWNWARD},
1297                        {3, FE_UPWARD},
1298                        {4, FE_TOWARDZERO},
1299                        // Only low three bits must be affecting output (for forward compatibility).
1300                        {8, FE_TONEAREST},
1301                        {9, FE_TOWARDZERO},
1302                        {10, FE_DOWNWARD},
1303                        {11, FE_UPWARD},
1304                        {12, FE_TOWARDZERO}};
1305   for (bool immediate_source : {true, false}) {
1306     for (auto [guest_rounding, host_rounding] : rounding) {
1307       if (immediate_source) {
1308         TestFrm(0x00205173 | (guest_rounding << 15), 0, guest_rounding & 0b111);
1309       } else {
1310         TestFrm(0x00219173, guest_rounding, guest_rounding & 0b111);
1311       }
1312       EXPECT_EQ(std::fegetround(), host_rounding);
1313     }
1314   }
1315 }
1316 
1317 TEST_F(TESTSUITE, OpInstructions) {
1318   // Add
1319   TestOp(0x003100b3, {{19, 23, 42}});
1320   // Sub
1321   TestOp(0x403100b3, {{42, 23, 19}});
1322   // And
1323   TestOp(0x003170b3, {{0b0101, 0b0011, 0b0001}});
1324   // Or
1325   TestOp(0x003160b3, {{0b0101, 0b0011, 0b0111}});
1326   // Xor
1327   TestOp(0x003140b3, {{0b0101, 0b0011, 0b0110}});
1328   // Sll
1329   TestOp(0x003110b3, {{0b1010, 3, 0b0101'0000}});
1330   // Srl
1331   TestOp(0x003150b3, {{0xf000'0000'0000'0000ULL, 12, 0x000f'0000'0000'0000ULL}});
1332   // Sra
1333   TestOp(0x403150b3, {{0xf000'0000'0000'0000ULL, 12, 0xffff'0000'0000'0000ULL}});
1334   // Slt
1335   TestOp(0x003120b3,
1336          {
1337              {19, 23, 1},
1338              {23, 19, 0},
1339              {~0ULL, 0, 1},
1340          });
1341   // Sltu
1342   TestOp(0x003130b3,
1343          {
1344              {19, 23, 1},
1345              {23, 19, 0},
1346              {~0ULL, 0, 0},
1347          });
1348   // Mul
1349   TestOp(0x023100b3, {{0x9999'9999'9999'9999, 0x9999'9999'9999'9999, 0x0a3d'70a3'd70a'3d71}});
1350   // Mulh
1351   TestOp(0x23110b3, {{0x9999'9999'9999'9999, 0x9999'9999'9999'9999, 0x28f5'c28f'5c28'f5c3}});
1352   // Mulhsu
1353   TestOp(0x23120b3, {{0x9999'9999'9999'9999, 0x9999'9999'9999'9999, 0xc28f'5c28'f5c2'8f5c}});
1354   // Mulhu
1355   TestOp(0x23130b3, {{0x9999'9999'9999'9999, 0x9999'9999'9999'9999, 0x5c28'f5c2'8f5c'28f5}});
1356   // Div
1357   TestOp(0x23140b3, {{0x9999'9999'9999'9999, 0x3333, 0xfffd'fffd'fffd'fffe}});
1358   TestOp(0x23140b3, {{42, 2, 21}});
1359   TestOp(0x23140b3, {{42, 0, -1}});
1360   TestOp(0x23140b3, {{-2147483648, -1, 2147483648}});
1361   TestOp(0x23140b3, {{0x8000'0000'0000'0000, -1, 0x8000'0000'0000'0000}});
1362 
1363   // Divu
1364   TestOp(0x23150b3, {{0x9999'9999'9999'9999, 0x3333, 0x0003'0003'0003'0003}});
1365   // Rem
1366   TestOp(0x23160b3, {{0x9999'9999'9999'9999, 0x3333, 0xffff'ffff'ffff'ffff}});
1367   // Remu
1368   TestOp(0x23170b3, {{0x9999'9999'9999'9999, 0x3333, 0}});
1369   // Andn
1370   TestOp(0x403170b3, {{0b0101, 0b0011, 0b0100}});
1371   // Orn
1372   TestOp(0x403160b3, {{0b0101, 0b0011, 0xffff'ffff'ffff'fffd}});
1373   // Xnor
1374   TestOp(0x403140b3, {{0b0101, 0b0011, 0xffff'ffff'ffff'fff9}});
1375   // Max
1376   TestOp(0x0a3160b3, {{bit_cast<uint64_t>(int64_t{-5}), 4, 4}});
1377   TestOp(0x0a3160b3,
1378          {{bit_cast<uint64_t>(int64_t{-5}),
1379            bit_cast<uint64_t>(int64_t{-10}),
1380            bit_cast<uint64_t>(int64_t{-5})}});
1381   // Maxu
1382   TestOp(0x0a3170b3, {{50, 1, 50}});
1383   // Min
1384   TestOp(0x0a3140b3, {{bit_cast<uint64_t>(int64_t{-5}), 4, bit_cast<uint64_t>(int64_t{-5})}});
1385   TestOp(0x0a3140b3,
1386          {{bit_cast<uint64_t>(int64_t{-5}),
1387            bit_cast<uint64_t>(int64_t{-10}),
1388            bit_cast<uint64_t>(int64_t{-10})}});
1389   // Minu
1390   TestOp(0x0a3150b3, {{50, 1, 1}});
1391 
1392   // Ror
1393   TestOp(0x603150b3, {{0xf000'0000'0000'000fULL, 4, 0xff00'0000'0000'0000ULL}});
1394   TestOp(0x603150b3, {{0xf000'0000'0000'000fULL, 8, 0x0ff0'0000'0000'0000ULL}});
1395   // Rol
1396   TestOp(0x603110b3, {{0xff00'0000'0000'0000ULL, 4, 0xf000'0000'0000'000fULL}});
1397   TestOp(0x603110b3, {{0x000f'ff00'0000'000fULL, 8, 0x0fff'0000'0000'0f00ULL}});
1398   // Sh1add
1399   TestOp(0x203120b3, {{0x0008'0000'0000'0001, 0x1001'0001'0000'0000ULL, 0x1011'0001'0000'0002ULL}});
1400   // Sh2add
1401   TestOp(0x203140b3, {{0x0008'0000'0000'0001, 0x0001'0001'0000'0000ULL, 0x0021'0001'0000'0004ULL}});
1402   // Sh3add
1403   TestOp(0x203160b3, {{0x0008'0000'0000'0001, 0x1001'0011'0000'0000ULL, 0x1041'0011'0000'0008ULL}});
1404   // Bclr
1405   TestOp(0x483110b3, {{0b1000'0001'0000'0001ULL, 0, 0b1000'0001'0000'0000ULL}});
1406   TestOp(0x483110b3, {{0b1000'0001'0000'0001ULL, 8, 0b1000'0000'0000'0001ULL}});
1407   // Bext
1408   TestOp(0x483150b3, {{0b1000'0001'0000'0001ULL, 0, 0b0000'0000'0000'0001ULL}});
1409   TestOp(0x483150b3, {{0b1000'0001'0000'0001ULL, 8, 0b0000'0000'0000'0001ULL}});
1410   TestOp(0x483150b3, {{0b1000'0001'0000'0001ULL, 7, 0b0000'0000'0000'0000ULL}});
1411   // Binv
1412   TestOp(0x683110b3, {{0b1000'0001'0000'0001ULL, 0, 0b1000'0001'0000'0000ULL}});
1413   TestOp(0x683110b3, {{0b1000'0001'0000'0001ULL, 1, 0b1000'0001'0000'0011ULL}});
1414   // Bset
1415   TestOp(0x283110b3, {{0b1000'0001'0000'0001ULL, 0, 0b1000'0001'0000'0001ULL}});
1416   TestOp(0x283110b3, {{0b1000'0001'0000'0001ULL, 1, 0b1000'0001'0000'0011ULL}});
1417 }
1418 
1419 TEST_F(TESTSUITE, Op32Instructions) {
1420   // Addw
1421   TestOp(0x003100bb, {{19, 23, 42}, {0x8000'0000, 0, 0xffff'ffff'8000'0000}});
1422   // Add.uw
1423   TestOp(0x083100bb, {{19, 23, 42}, {0x8000'0000'8000'0000, 1, 0x0000'0000'8000'0001}});
1424   // Subw
1425   TestOp(0x403100bb, {{42, 23, 19}, {0x8000'0000, 0, 0xffff'ffff'8000'0000}});
1426   // Sllw
1427   TestOp(0x003110bb, {{0b1010, 3, 0b1010'000}});
1428   // Srlw
1429   TestOp(0x003150bb, {{0x0000'0000'f000'0000ULL, 12, 0x0000'0000'000f'0000ULL}});
1430   // Sraw
1431   TestOp(0x403150bb, {{0x0000'0000'f000'0000ULL, 12, 0xffff'ffff'ffff'0000ULL}});
1432   // Mulw
1433   TestOp(0x023100bb, {{0x9999'9999'9999'9999, 0x9999'9999'9999'9999, 0xffff'ffff'd70a'3d71}});
1434   // Divw
1435   TestOp(0x23140bb, {{0x9999'9999'9999'9999, 0x3333, 0xffff'ffff'fffd'fffe}});
1436   TestOp(0x23140bb, {{0x9999'9999'9999'9999, 0, -1}});
1437   TestOp(0x23140bb, {{-2147483648, -1, -2147483648}});
1438 
1439   // Divuw
1440   TestOp(0x23150bb,
1441          {{0x9999'9999'9999'9999, 0x3333, 0x0000'0000'0003'0003},
1442           {0xffff'ffff'8000'0000, 1, 0xffff'ffff'8000'0000}});
1443   // Remw
1444   TestOp(0x23160bb, {{0x9999'9999'9999'9999, 0x3333, 0xffff'ffff'ffff'ffff}});
1445   // Remuw
1446   TestOp(0x23170bb,
1447          {{0x9999'9999'9999'9999, 0x3333, 0},
1448           {0xffff'ffff'8000'0000, 0xffff'ffff'8000'0001, 0xffff'ffff'8000'0000}});
1449   // Zext.h
1450   TestOp(0x080140bb, {{0xffff'ffff'ffff'fffeULL, 0, 0xfffe}});
1451   // Rorw
1452   TestOp(0x603150bb, {{0x0000'0000'f000'000fULL, 4, 0xffff'ffff'ff00'0000}});
1453   TestOp(0x603150bb, {{0x0000'0000'f000'0000ULL, 4, 0x0000'0000'0f00'0000}});
1454   TestOp(0x603150bb, {{0x0000'0000'0f00'000fULL, 4, 0xffff'ffff'f0f0'0000}});
1455   // Rolw
1456   TestOp(0x603110bb, {{0x0000'0000'f000'000fULL, 4, 0x0000'0000'0000'00ff}});
1457   TestOp(0x603110bb, {{0x0000'0000'0ff0'0000ULL, 4, 0xffff'ffff'ff00'0000}});
1458   // Sh1add.uw
1459   TestOp(0x203120bb, {{0xf0ff'0000'8000'0001, 0x8000'0000, 0x0000'0001'8000'0002}});
1460   // Sh2add.uw
1461   TestOp(0x203140bb, {{0xf0ff'00ff'8000'0001, 0x8000'0000, 0x0000'0002'8000'0004}});
1462   // Sh3add.uw
1463   TestOp(0x203160bb, {{0xf0ff'0f00'8000'0001, 0x8000'0000, 0x0000'0004'8000'0008}});
1464 }
1465 
1466 TEST_F(TESTSUITE, OpImmInstructions) {
1467   // Addi
1468   TestOpImm(0x00010093, {{19, 23, 42}});
1469   // Slti
1470   TestOpImm(0x00012093,
1471             {
1472                 {19, 23, 1},
1473                 {23, 19, 0},
1474                 {~0ULL, 0, 1},
1475             });
1476   // Sltiu
1477   TestOpImm(0x00013093,
1478             {
1479                 {19, 23, 1},
1480                 {23, 19, 0},
1481                 {~0ULL, 0, 0},
1482             });
1483   // Xori
1484   TestOpImm(0x00014093, {{0b0101, 0b0011, 0b0110}});
1485   // Ori
1486   TestOpImm(0x00016093, {{0b0101, 0b0011, 0b0111}});
1487   // Andi
1488   TestOpImm(0x00017093, {{0b0101, 0b0011, 0b0001}});
1489   // Slli
1490   TestOpImm(0x00011093, {{0b1010, 3, 0b1010'000}});
1491   // Srli
1492   TestOpImm(0x00015093, {{0xf000'0000'0000'0000ULL, 12, 0x000f'0000'0000'0000ULL}});
1493   // Srai
1494   TestOpImm(0x40015093, {{0xf000'0000'0000'0000ULL, 12, 0xffff'0000'0000'0000ULL}});
1495   // Rori
1496   TestOpImm(0x60015093, {{0xf000'0000'0000'000fULL, 4, 0xff00'0000'0000'0000ULL}});
1497   // Clz
1498   TestOpImm(0x60011093, {{0, 0, 64}});
1499   TestOpImm(0x60011093, {{123, 0, 57}});
1500   // Ctz
1501   TestOpImm(0x60111093, {{0, 0, 64}});
1502   TestOpImm(0x60111093, {{0x01000000'0000, 0, 40}});
1503   // Cpop
1504   TestOpImm(0x60211093, {{0xf000'0000'0000'000fULL, 0, 8}});
1505   // Rev8
1506   TestOpImm(0x6b815093, {{0x0000'0000'0000'000fULL, 0, 0x0f00'0000'0000'0000ULL}});
1507   TestOpImm(0x6b815093, {{0xf000'0000'0000'0000ULL, 0, 0x0000'0000'0000'00f0ULL}});
1508   TestOpImm(0x6b815093, {{0x00f0'0000'0000'0000ULL, 0, 0x0000'0000'0000'f000ULL}});
1509   TestOpImm(0x6b815093, {{0x0000'000f'0000'0000ULL, 0, 0x0000'0000'0f00'0000ULL}});
1510 
1511   // Sext.b
1512   TestOpImm(0x60411093, {{0b1111'1110, 0, 0xffff'ffff'ffff'fffe}});  // -2
1513   // Sext.h
1514   TestOpImm(0x60511093, {{0b1111'1110, 0, 0xfe}});
1515   TestOpImm(0x60511093, {{0b1111'1111'1111'1110, 0, 0xffff'ffff'ffff'fffe}});
1516   // Orc.b
1517   TestOpImm(0x28715093, {{0xfe00'f0ff'fa00'fffb, 0, 0xff00'ffff'ff00'ffff}});
1518   TestOpImm(0x28715093, {{0xfa00, 0, 0xff00}});
1519   // Bclri
1520   TestOpImm(0x48011093, {{0b1000'0001'0000'0001ULL, 0, 0b1000'0001'0000'0000ULL}});
1521   TestOpImm(0x48011093, {{0b1000'0001'0000'0001ULL, 8, 0b1000'0000'0000'0001ULL}});
1522   // Bexti
1523   TestOpImm(0x48015093, {{0b1000'0001'0000'0001ULL, 0, 0b0000'0000'0000'0001ULL}});
1524   TestOpImm(0x48015093, {{0b1000'0001'0000'0001ULL, 8, 0b0000'0000'0000'0001ULL}});
1525   TestOpImm(0x48015093, {{0b1000'0001'0000'0001ULL, 7, 0b0000'0000'0000'0000ULL}});
1526   // Binvi
1527   TestOpImm(0x68011093, {{0b1000'0001'0000'0001ULL, 0, 0b1000'0001'0000'0000ULL}});
1528   TestOpImm(0x68011093, {{0b1000'0001'0000'0001ULL, 1, 0b1000'0001'0000'0011ULL}});
1529   // Bset
1530   TestOpImm(0x28011093, {{0b1000'0001'0000'0001ULL, 0, 0b1000'0001'0000'0001ULL}});
1531   TestOpImm(0x28011093, {{0b1000'0001'0000'0001ULL, 1, 0b1000'0001'0000'0011ULL}});
1532 }
1533 
1534 TEST_F(TESTSUITE, OpImm32Instructions) {
1535   // Addiw
1536   TestOpImm(0x0001009b, {{19, 23, 42}, {0x8000'0000, 0, 0xffff'ffff'8000'0000}});
1537   // Slliw
1538   TestOpImm(0x0001109b, {{0b1010, 3, 0b1010'000}});
1539   // Srliw
1540   TestOpImm(0x0001509b, {{0x0000'0000'f000'0000ULL, 12, 0x0000'0000'000f'0000ULL}});
1541   // Sraiw
1542   TestOpImm(0x4001509b, {{0x0000'0000'f000'0000ULL, 12, 0xffff'ffff'ffff'0000ULL}});
1543   // Roriw
1544   TestOpImm(0x6001509b, {{0x0000'0000'f000'000fULL, 4, 0xffff'ffff'ff00'0000}});
1545   TestOpImm(0x6001509b, {{0x0000'0000'f000'0000ULL, 4, 0x0000'0000'0f00'0000}});
1546   TestOpImm(0x6001509b, {{0x0000'0000'0f00'000fULL, 4, 0xffff'ffff'f0f0'0000}});
1547   // Clzw
1548   TestOpImm(0x6001109b, {{0, 0, 32}});
1549   TestOpImm(0x6001109b, {{123, 0, 25}});
1550   // Ctzw
1551   TestOpImm(0x6011109b, {{0, 0, 32}});
1552   TestOpImm(0x6011109b, {{0x0000'0000'0000'0010, 0, 4}});
1553   // Cpopw
1554   TestOpImm(0x6021109b, {{0xf000'0000'0000'000f, 0, 4}});
1555   // Slli.uw
1556   TestOpImm(0x0801109b, {{0x0000'0000'f000'000fULL, 4, 0x0000'000f'0000'00f0}});
1557 }
1558 
1559 TEST_F(TESTSUITE, OpFpInstructions) {
1560   // FAdd.S
1561   TestOpFp(0x003100d3, {std::tuple{1.0f, 2.0f, 3.0f}});
1562   // FAdd.D
1563   TestOpFp(0x023100d3, {std::tuple{1.0, 2.0, 3.0}});
1564   // FSub.S
1565   TestOpFp(0x083100d3, {std::tuple{3.0f, 2.0f, 1.0f}});
1566   // FSub.D
1567   TestOpFp(0x0a3100d3, {std::tuple{3.0, 2.0, 1.0}});
1568   // FMul.S
1569   TestOpFp(0x103100d3, {std::tuple{3.0f, 2.0f, 6.0f}});
1570   // FMul.D
1571   TestOpFp(0x123100d3, {std::tuple{3.0, 2.0, 6.0}});
1572   // FDiv.S
1573   TestOpFp(0x183100d3, {std::tuple{6.0f, 2.0f, 3.0f}});
1574   // FDiv.D
1575   TestOpFp(0x1a3100d3, {std::tuple{6.0, 2.0, 3.0}});
1576   // FSgnj.S
1577   TestOpFp(0x203100d3,
1578            {std::tuple{1.0f, 2.0f, 1.0f},
1579             {-1.0f, 2.0f, 1.0f},
1580             {1.0f, -2.0f, -1.0f},
1581             {-1.0f, -2.0f, -1.0f}});
1582   // FSgnj.D
1583   TestOpFp(0x223100d3,
1584            {
1585                std::tuple{1.0, 2.0, 1.0},
1586                {-1.0, 2.0, 1.0},
1587                {1.0, -2.0, -1.0},
1588                {-1.0, -2.0, -1.0},
1589            });
1590   // FSgnjn.S
1591   TestOpFp(0x203110d3,
1592            {
1593                std::tuple{1.0f, 2.0f, -1.0f},
1594                {1.0f, 2.0f, -1.0f},
1595                {1.0f, -2.0f, 1.0f},
1596                {-1.0f, -2.0f, 1.0f},
1597            });
1598   // FSgnjn.D
1599   TestOpFp(0x223110d3,
1600            {
1601                std::tuple{1.0, 2.0, -1.0},
1602                {1.0, 2.0, -1.0},
1603                {1.0, -2.0, 1.0},
1604                {-1.0, -2.0, 1.0},
1605            });
1606   // FSgnjx.S
1607   TestOpFp(0x203120d3,
1608            {
1609                std::tuple{1.0f, 2.0f, 1.0f},
1610                {-1.0f, 2.0f, -1.0f},
1611                {1.0f, -2.0f, -1.0f},
1612                {-1.0f, -2.0f, 1.0f},
1613            });
1614   // FSgnjx.D
1615   TestOpFp(0x223120d3,
1616            {
1617                std::tuple{1.0, 2.0, 1.0},
1618                {-1.0, 2.0, -1.0},
1619                {1.0, -2.0, -1.0},
1620                {-1.0, -2.0, 1.0},
1621            });
1622   // FMin.S
1623   TestOpFp(0x283100d3,
1624            {std::tuple{+0.f, +0.f, +0.f},
1625             {+0.f, -0.f, -0.f},
1626             {-0.f, +0.f, -0.f},
1627             {-0.f, -0.f, -0.f},
1628             {+0.f, 1.f, +0.f},
1629             {-0.f, 1.f, -0.f}});
1630   // FMin.D
1631   TestOpFp(0x2a3100d3,
1632            {std::tuple{+0.0, +0.0, +0.0},
1633             {+0.0, -0.0, -0.0},
1634             {-0.0, +0.0, -0.0},
1635             {-0.0, -0.0, -0.0},
1636             {+0.0, 1.0, +0.0},
1637             {-0.0, 1.0, -0.0}});
1638   // FMax.S
1639   TestOpFp(0x283110d3,
1640            {std::tuple{+0.f, +0.f, +0.f},
1641             {+0.f, -0.f, +0.f},
1642             {-0.f, +0.f, +0.f},
1643             {-0.f, -0.f, -0.f},
1644             {+0.f, 1.f, 1.f},
1645             {-0.f, 1.f, 1.f}});
1646   // FMax.D
1647   TestOpFp(0x2a3110d3,
1648            {std::tuple{+0.0, +0.0, +0.0},
1649             {+0.0, -0.0, +0.0},
1650             {-0.0, +0.0, +0.0},
1651             {-0.0, -0.0, -0.0},
1652             {+0.0, 1.0, 1.0},
1653             {-0.0, 1.0, 1.0}});
1654 }
1655 
1656 TEST_F(TESTSUITE, UpperImmInstructions) {
1657   // Auipc
1658   TestAuipc(0xfedcb097, 0xffff'ffff'fedc'b000);
1659   // Lui
1660   TestLui(0xfedcb0b7, 0xffff'ffff'fedc'b000);
1661 }
1662 
1663 TEST_F(TESTSUITE, TestBranchInstructions) {
1664   // Beq
1665   TestBranch(0x00208463,
1666              {
1667                  {42, 42, 8},
1668                  {41, 42, 4},
1669                  {42, 41, 4},
1670              });
1671   // Bne
1672   TestBranch(0x00209463,
1673              {
1674                  {42, 42, 4},
1675                  {41, 42, 8},
1676                  {42, 41, 8},
1677              });
1678   // Bltu
1679   TestBranch(0x0020e463,
1680              {
1681                  {41, 42, 8},
1682                  {42, 42, 4},
1683                  {42, 41, 4},
1684                  {0xf000'0000'0000'0000ULL, 42, 4},
1685                  {42, 0xf000'0000'0000'0000ULL, 8},
1686              });
1687   // Bgeu
1688   TestBranch(0x0020f463,
1689              {
1690                  {42, 41, 8},
1691                  {42, 42, 8},
1692                  {41, 42, 4},
1693                  {0xf000'0000'0000'0000ULL, 42, 8},
1694                  {42, 0xf000'0000'0000'0000ULL, 4},
1695              });
1696   // Blt
1697   TestBranch(0x0020c463,
1698              {
1699                  {41, 42, 8},
1700                  {42, 42, 4},
1701                  {42, 41, 4},
1702                  {0xf000'0000'0000'0000ULL, 42, 8},
1703                  {42, 0xf000'0000'0000'0000ULL, 4},
1704              });
1705   // Bge
1706   TestBranch(0x0020d463,
1707              {
1708                  {42, 41, 8},
1709                  {42, 42, 8},
1710                  {41, 42, 4},
1711                  {0xf000'0000'0000'0000ULL, 42, 4},
1712                  {42, 0xf000'0000'0000'0000ULL, 8},
1713              });
1714   // Beq with negative offset.
1715   TestBranch(0xfe208ee3,
1716              {
1717                  {42, 42, -4},
1718              });
1719 }
1720 
1721 TEST_F(TESTSUITE, JumpAndLinkInstructions) {
1722   // Jal
1723   TestJumpAndLink(0x008000ef, 8);
1724   // Jal with negative offset.
1725   TestJumpAndLink(0xffdff0ef, -4);
1726 }
1727 
1728 TEST_F(TESTSUITE, JumpAndLinkWithReturnAddressRegisterAsTarget) {
1729   uint32_t insn_bytes{// jalr   ra
1730                       0x000080e7};
1731   auto code_start = ToGuestAddr(&insn_bytes);
1732   state_.cpu.insn_addr = code_start;
1733   // Translation cache requires upper bits to be zero.
1734   constexpr GuestAddr kJumpTargetAddr = 0x0000'f00d'cafe'b0baULL;
1735   SetXReg<RA>(state_.cpu, kJumpTargetAddr);
1736 
1737   EXPECT_TRUE(RunOneInstruction(&state_, kJumpTargetAddr));
1738   EXPECT_EQ(state_.cpu.insn_addr, kJumpTargetAddr);
1739   EXPECT_EQ(GetXReg<RA>(state_.cpu), code_start + 4);
1740 }
1741 
1742 TEST_F(TESTSUITE, JumpAndLinkRegisterInstructions) {
1743   // Jalr offset=4.
1744   TestJumpAndLinkRegister<4>(0x004100e7, 38, 42);
1745   // Jalr offset=-4.
1746   TestJumpAndLinkRegister<4>(0xffc100e7, 42, 38);
1747   // Jalr offset=5 - must properly align the target to even.
1748   TestJumpAndLinkRegister<4>(0x005100e7, 38, 42);
1749   // Jr offset=4.
1750   TestJumpAndLinkRegister<0>(0x00410067, 38, 42);
1751   // Jr offset=-4.
1752   TestJumpAndLinkRegister<0>(0xffc10067, 42, 38);
1753   // Jr offset=5 - must properly align the target to even.
1754   TestJumpAndLinkRegister<0>(0x00510067, 38, 42);
1755 }
1756 
1757 TEST_F(TESTSUITE, LoadInstructions) {
1758   // Offset is always 8.
1759   // Lbu
1760   TestLoad(0x00814083, kDataToLoad & 0xffULL);
1761   // Lhu
1762   TestLoad(0x00815083, kDataToLoad & 0xffffULL);
1763   // Lwu
1764   TestLoad(0x00816083, kDataToLoad & 0xffff'ffffULL);
1765   // Ldu
1766   TestLoad(0x00813083, kDataToLoad);
1767   // Lb
1768   TestLoad(0x00810083, int64_t{int8_t(kDataToLoad)});
1769   // Lh
1770   TestLoad(0x00811083, int64_t{int16_t(kDataToLoad)});
1771   // Lw
1772   TestLoad(0x00812083, int64_t{int32_t(kDataToLoad)});
1773 }
1774 
1775 TEST_F(TESTSUITE, StoreInstructions) {
1776   // Offset is always 8.
1777   // Sb
1778   TestStore(0x00208423, kDataToStore & 0xffULL);
1779   // Sh
1780   TestStore(0x00209423, kDataToStore & 0xffffULL);
1781   // Sw
1782   TestStore(0x0020a423, kDataToStore & 0xffff'ffffULL);
1783   // Sd
1784   TestStore(0x0020b423, kDataToStore);
1785 }
1786 TEST_F(TESTSUITE, FmaInstructions) {
1787   // Fmadd.S
1788   TestFma(0x203170c3, {std::tuple{1.0f, 2.0f, 3.0f, 5.0f}});
1789   // Fmadd.D
1790   TestFma(0x223170c3, {std::tuple{1.0, 2.0, 3.0, 5.0}});
1791   // Fmsub.S
1792   TestFma(0x203170c7, {std::tuple{1.0f, 2.0f, 3.0f, -1.0f}});
1793   // Fmsub.D
1794   TestFma(0x223170c7, {std::tuple{1.0, 2.0, 3.0, -1.0}});
1795   // Fnmsub.S
1796   TestFma(0x203170cb, {std::tuple{1.0f, 2.0f, 3.0f, 1.0f}});
1797   // Fnmsub.D
1798   TestFma(0x223170cb, {std::tuple{1.0, 2.0, 3.0, 1.0}});
1799   // Fnmadd.S
1800   TestFma(0x203170cf, {std::tuple{1.0f, 2.0f, 3.0f, -5.0f}});
1801   // Fnmadd.D
1802   TestFma(0x223170cf, {std::tuple{1.0, 2.0, 3.0, -5.0}});
1803 }
1804 
1805 #if (defined(TESTING_INTERPRETER) || defined(TESTING_HEAVY_OPTIMIZER))
1806 
1807 TEST_F(TESTSUITE, AtomicLoadInstructions) {
1808   // Validate sign-extension of returned value.
1809   const uint64_t kNegative32BitValue = 0x0000'0000'8000'0000ULL;
1810   const uint64_t kSignExtendedNegative = 0xffff'ffff'8000'0000ULL;
1811   const uint64_t kPositive32BitValue = 0xffff'ffff'0000'0000ULL;
1812   const uint64_t kSignExtendedPositive = 0ULL;
1813   static_assert(static_cast<int32_t>(kSignExtendedPositive) >= 0);
1814   static_assert(static_cast<int32_t>(kSignExtendedNegative) < 0);
1815 
1816   // Lrw - sign extends from 32 to 64.
1817   TestAtomicLoad(0x1000a12f, &kPositive32BitValue, kSignExtendedPositive);
1818   TestAtomicLoad(0x1000a12f, &kNegative32BitValue, kSignExtendedNegative);
1819 
1820   // Lrd
1821   TestAtomicLoad(0x1000b12f, &kDataToLoad, kDataToLoad);
1822 }
1823 
1824 TEST_F(TESTSUITE, AtomicStoreInstructions) {
1825   // Scw
1826   TestAtomicStore(0x1820a1af, static_cast<uint32_t>(kDataToStore));
1827 
1828   // Scd
1829   TestAtomicStore(0x1820b1af, kDataToStore);
1830 }
1831 
1832 TEST_F(TESTSUITE, AtomicStoreInstructionNoLoadFailure) {
1833   // Scw
1834   TestAtomicStoreNoLoadFailure(0x1820a1af);
1835 
1836   // Scd
1837   TestAtomicStoreNoLoadFailure(0x1820b1af);
1838 }
1839 
1840 TEST_F(TESTSUITE, AtomicStoreInstructionDifferentLoadFailure) {
1841   // Scw
1842   TestAtomicStoreDifferentLoadFailure(0x1820a1af);
1843 
1844   // Scd
1845   TestAtomicStoreDifferentLoadFailure(0x1820b1af);
1846 }
1847 
1848 #endif  // (defined(TESTING_INTERPRETER) || defined(TESTING_HEAVY_OPTIMIZER))
1849 
1850 TEST_F(TESTSUITE, AmoInstructions) {
1851   // Verifying that all aq and rl combinations work for Amoswap, but only test relaxed one for most
1852   // other instructions for brevity.
1853 
1854   // AmoswaoW/AmoswaoD
1855   TestAmo(0x083120af, 0x083130af, 0xaaaa'bbbb'cccc'ddddULL);
1856 
1857   // AmoswapWAq/AmoswapDAq
1858   TestAmo(0x0c3120af, 0x0c3130af, 0xaaaa'bbbb'cccc'ddddULL);
1859 
1860   // AmoswapWRl/AmoswapDRl
1861   TestAmo(0x0a3120af, 0x0a3130af, 0xaaaa'bbbb'cccc'ddddULL);
1862 
1863   // AmoswapWAqrl/AmoswapDAqrl
1864   TestAmo(0x0e3120af, 0x0e3130af, 0xaaaa'bbbb'cccc'ddddULL);
1865 
1866   // AmoaddW/AmoaddD
1867   TestAmo(0x003120af, 0x003130af, 0xaaaa'aaaa'aaaa'aaa9);
1868 
1869   // AmoxorW/AmoxorD
1870   TestAmo(0x203120af, 0x203130af, 0x5555'5555'1111'1111);
1871 
1872   // AmoandW/AmoandD
1873   TestAmo(0x603120af, 0x603130af, 0xaaaa'aaaa'cccc'cccc);
1874 
1875   // AmoorW/AmoorD
1876   TestAmo(0x403120af, 0x403130af, 0xffff'ffff'dddd'dddd);
1877 
1878   // AmominW/AmominD
1879   TestAmo(0x803120af, 0x803130af, 0xaaaa'bbbb'cccc'ddddULL);
1880 
1881   // AmomaxW/AmomaxD
1882   TestAmo(0xa03120af, 0xa03130af, 0xffff'eeee'dddd'ccccULL);
1883 
1884   // AmominuW/AmominuD
1885   TestAmo(0xc03120af, 0xc03130af, 0xaaaa'bbbb'cccc'ddddULL);
1886 
1887   // AmomaxuW/AmomaxuD
1888   TestAmo(0xe03120af, 0xe03130af, 0xffff'eeee'dddd'ccccULL);
1889 }
1890 
1891 TEST_F(TESTSUITE, OpFpSingleInputInstructions) {
1892   // FSqrt.S
1893   TestOpFpSingleInput(0x580170d3, {std::tuple{4.0f, 2.0f}});
1894   // FSqrt.D
1895   TestOpFpSingleInput(0x5a0170d3, {std::tuple{16.0, 4.0}});
1896 }
1897 
1898 TEST_F(TESTSUITE, Fmv) {
1899   // Fmv.X.W
1900   TestFmvFloatToInteger(0xe00080d3,
1901                         {std::tuple{1.0f, static_cast<uint64_t>(bit_cast<uint32_t>(1.0f))},
1902                          {-1.0f, static_cast<int64_t>(bit_cast<int32_t>(-1.0f))}});
1903   // Fmv.W.X
1904   TestFmvIntegerToFloat(
1905       0xf00080d3, {std::tuple{bit_cast<uint32_t>(1.0f), 1.0f}, {bit_cast<uint32_t>(-1.0f), -1.0f}});
1906   // Fmv.X.D
1907   TestFmvFloatToInteger(
1908       0xe20080d3, {std::tuple{1.0, bit_cast<uint64_t>(1.0)}, {-1.0, bit_cast<uint64_t>(-1.0)}});
1909   // Fmv.D.X
1910   TestFmvIntegerToFloat(
1911       0xf20080d3, {std::tuple{bit_cast<uint64_t>(1.0), 1.0}, {bit_cast<uint64_t>(-1.0), -1.0}});
1912   // Fmv.S
1913   TestOpFpSingleInput(0x202100d3, {std::tuple{1.0f, 1.0f}, {-1.0f, -1.0f}});
1914   // Fmv.D
1915   TestOpFpSingleInput(0x222100d3,
1916                       {std::tuple{bit_cast<uint64_t>(1.0), 1.0}, {bit_cast<uint64_t>(-1.0), -1.0}});
1917 }
1918 
1919 const uint32_t kPosNanFloat = kFPValueToFPReg(std::numeric_limits<float>::quiet_NaN());
1920 const uint32_t kNegNanFloat = kFPValueToFPReg(-std::numeric_limits<float>::quiet_NaN());
1921 const uint64_t kPosNanDouble = kFPValueToFPReg(std::numeric_limits<double>::quiet_NaN());
1922 const uint64_t kNegNanDouble = kFPValueToFPReg(-std::numeric_limits<double>::quiet_NaN());
1923 constexpr uint64_t kMaskFloatBits = (uint64_t{1} << 32) - 1;
1924 
1925 TEST_F(TESTSUITE, FabsSinglePrecisionNanPosToPos) {
1926   SetFReg<2>(state_.cpu, kPosNanFloat);
1927   RunInstruction(0x202120d3);  // fabs.s f1, f2
1928   EXPECT_EQ(GetFReg<1>(state_.cpu) & kMaskFloatBits, kPosNanFloat);
1929 }
1930 
1931 TEST_F(TESTSUITE, FabsSinglePrecisionNanNegToPos) {
1932   SetFReg<2>(state_.cpu, kNegNanFloat);
1933   RunInstruction(0x202120d3);  // fabs.s f1, f2
1934   EXPECT_EQ(GetFReg<1>(state_.cpu) & kMaskFloatBits, kPosNanFloat);
1935 }
1936 
1937 TEST_F(TESTSUITE, FabsDoublePrecisionNanPosToPos) {
1938   SetFReg<2>(state_.cpu, kPosNanDouble);
1939   RunInstruction(0x222120d3);  // fabs.d f1, f2
1940   EXPECT_EQ(GetFReg<1>(state_.cpu), kPosNanDouble);
1941 }
1942 
1943 TEST_F(TESTSUITE, FabsDoublePrecisionNanNegToPos) {
1944   SetFReg<2>(state_.cpu, kNegNanDouble);
1945   RunInstruction(0x222120d3);  // fabs.d f1, f2
1946   EXPECT_EQ(GetFReg<1>(state_.cpu), kPosNanDouble);
1947 }
1948 
1949 TEST_F(TESTSUITE, FnegSinglePrecisionNanPosToNeg) {
1950   SetFReg<2>(state_.cpu, kPosNanFloat);
1951   RunInstruction(0x202110d3);  // fneg.s f1, f2
1952   EXPECT_EQ(GetFReg<1>(state_.cpu) & kMaskFloatBits, kNegNanFloat);
1953 }
1954 
1955 TEST_F(TESTSUITE, FnegSinglePrecisionNanNegToPos) {
1956   SetFReg<2>(state_.cpu, kNegNanFloat);
1957   RunInstruction(0x202110d3);  // fneg.s f1, f2
1958   EXPECT_EQ(GetFReg<1>(state_.cpu) & kMaskFloatBits, kPosNanFloat);
1959 }
1960 
1961 TEST_F(TESTSUITE, FnegDoublePrecisionNanPosToNeg) {
1962   SetFReg<2>(state_.cpu, kPosNanDouble);
1963   RunInstruction(0x222110d3);  // fneg.s f1, f2
1964   EXPECT_EQ(GetFReg<1>(state_.cpu), kNegNanDouble);
1965 }
1966 
1967 TEST_F(TESTSUITE, FnegDoublePrecisionNanNegToPos) {
1968   SetFReg<2>(state_.cpu, kNegNanDouble);
1969   RunInstruction(0x222110d3);  // fneg.s f1, f2
1970   EXPECT_EQ(GetFReg<1>(state_.cpu), kPosNanDouble);
1971 }
1972 
1973 TEST_F(TESTSUITE, OpFpFcvt) {
1974   // Fcvt.S.D
1975   TestOpFpSingleInput(0x401170d3, {std::tuple{1.0, 1.0f}});
1976   // Fcvt.D.S
1977   TestOpFpSingleInput(0x420100d3, {std::tuple{2.0f, 2.0}});
1978   // Fcvt.W.S
1979   TestOpFpGpRegisterTargetSingleInput(0xc00170d3, {std::tuple{3.0f, 3UL}});
1980   // Fcvt.WU.S
1981   TestOpFpGpRegisterTargetSingleInput(0xc01170d3, {std::tuple{3.0f, 3UL}});
1982   // Fcvt.L.S
1983   TestOpFpGpRegisterTargetSingleInput(0xc02170d3, {std::tuple{3.0f, 3UL}});
1984   // Fcvt.LU.S
1985   TestOpFpGpRegisterTargetSingleInput(0xc03170d3, {std::tuple{3.0f, 3UL}});
1986   // Fcvt.W.D
1987   TestOpFpGpRegisterTargetSingleInput(0xc20170d3, {std::tuple{3.0, 3UL}});
1988   // Fcvt.WU.D
1989   TestOpFpGpRegisterTargetSingleInput(0xc21170d3, {std::tuple{3.0, 3UL}});
1990   // Fcvt.L.D
1991   TestOpFpGpRegisterTargetSingleInput(0xc22170d3, {std::tuple{3.0, 3UL}});
1992   // Fcvt.LU.D
1993   TestOpFpGpRegisterTargetSingleInput(0xc23170d3, {std::tuple{3.0, 3UL}});
1994   // Fcvt.S.W
1995   TestOpFpGpRegisterSourceSingleInput(0xd00170d3, {std::tuple{3UL, 3.0f}});
1996   // Fcvt.S.WU
1997   TestOpFpGpRegisterSourceSingleInput(0xd01170d3, {std::tuple{3UL, 3.0f}});
1998   // Fcvt.S.L
1999   TestOpFpGpRegisterSourceSingleInput(0xd02170d3, {std::tuple{3UL, 3.0f}});
2000   // Fcvt.S.LU
2001   TestOpFpGpRegisterSourceSingleInput(0xd03170d3, {std::tuple{3UL, 3.0f}});
2002   // Fcvt.D.W
2003   TestOpFpGpRegisterSourceSingleInput(0xd20170d3, {std::tuple{3UL, 3.0}});
2004   // Fcvt.D.Wu
2005   TestOpFpGpRegisterSourceSingleInput(0xd21170d3, {std::tuple{3UL, 3.0}});
2006   // Fcvt.D.L
2007   TestOpFpGpRegisterSourceSingleInput(0xd22170d3, {std::tuple{3UL, 3.0}});
2008   // Fcvt.D.LU
2009   TestOpFpGpRegisterSourceSingleInput(0xd23170d3, {std::tuple{3UL, 3.0}});
2010 }
2011 
2012 TEST_F(TESTSUITE, OpFpGpRegisterTargetInstructions) {
2013   // Fle.S
2014   TestOpFpGpRegisterTarget(0xa03100d3,
2015                            {std::tuple{1.0f, 2.0f, 1UL}, {2.0f, 1.0f, 0UL}, {0.0f, 0.0f, 1UL}});
2016   // Fle.D
2017   TestOpFpGpRegisterTarget(0xa23100d3,
2018                            {std::tuple{1.0, 2.0, 1UL}, {2.0, 1.0, 0UL}, {0.0, 0.0, 1UL}});
2019   // Flt.S
2020   TestOpFpGpRegisterTarget(0xa03110d3,
2021                            {std::tuple{1.0f, 2.0f, 1UL}, {2.0f, 1.0f, 0UL}, {0.0f, 0.0f, 0UL}});
2022   // Flt.D
2023   TestOpFpGpRegisterTarget(0xa23110d3,
2024                            {std::tuple{1.0, 2.0, 1UL}, {2.0, 1.0, 0UL}, {0.0, 0.0, 0UL}});
2025   // Feq.S
2026   TestOpFpGpRegisterTarget(0xa03120d3,
2027                            {std::tuple{1.0f, 2.0f, 0UL}, {2.0f, 1.0f, 0UL}, {0.0f, 0.0f, 1UL}});
2028   // Feq.D
2029   TestOpFpGpRegisterTarget(0xa23120d3,
2030                            {std::tuple{1.0, 2.0, 0UL}, {2.0, 1.0, 0UL}, {0.0, 0.0, 1UL}});
2031 }
2032 
2033 TEST_F(TESTSUITE, TestOpFpGpRegisterTargetSingleInput) {
2034   // Fclass.S
2035   TestOpFpGpRegisterTargetSingleInput(
2036       0xe00110d3,
2037       {std::tuple{-std::numeric_limits<float>::infinity(), 0b00'0000'0001UL},
2038        {-1.0f, 0b00'0000'0010UL},
2039        {-std::numeric_limits<float>::denorm_min(), 0b00'0000'0100UL},
2040        {-0.0f, 0b00'0000'1000UL},
2041        {0.0f, 0b00'0001'0000UL},
2042        {std::numeric_limits<float>::denorm_min(), 0b00'0010'0000UL},
2043        {1.0f, 0b00'0100'0000UL},
2044        {std::numeric_limits<float>::infinity(), 0b00'1000'0000UL},
2045        {std::numeric_limits<float>::signaling_NaN(), 0b01'0000'0000UL},
2046        {std::numeric_limits<float>::quiet_NaN(), 0b10'0000'0000UL}});
2047   // Fclass.D
2048   TestOpFpGpRegisterTargetSingleInput(
2049       0xe20110d3,
2050       {std::tuple{-std::numeric_limits<double>::infinity(), 0b00'0000'0001UL},
2051        {-1.0, 0b00'0000'0010UL},
2052        {-std::numeric_limits<double>::denorm_min(), 0b00'0000'0100UL},
2053        {-0.0, 0b00'0000'1000UL},
2054        {0.0, 0b00'0001'0000UL},
2055        {std::numeric_limits<double>::denorm_min(), 0b00'0010'0000UL},
2056        {1.0, 0b00'0100'0000UL},
2057        {std::numeric_limits<double>::infinity(), 0b00'1000'0000UL},
2058        {std::numeric_limits<double>::signaling_NaN(), 0b01'0000'0000UL},
2059        {std::numeric_limits<double>::quiet_NaN(), 0b10'0000'0000UL}});
2060 }
2061 
2062 TEST_F(TESTSUITE, RoundingModeTest) {
2063   // FAdd.S
2064   TestOpFp(0x003100d3,
2065            // Test RNE
2066            {std::tuple{1.0000001f, 0.000000059604645f, 1.0000002f},
2067             {1.0000002f, 0.000000059604645f, 1.0000002f},
2068             {1.0000004f, 0.000000059604645f, 1.0000005f},
2069             {-1.0000001f, -0.000000059604645f, -1.0000002f},
2070             {-1.0000002f, -0.000000059604645f, -1.0000002f},
2071             {-1.0000004f, -0.000000059604645f, -1.0000005f}});
2072   // FAdd.S
2073   TestOpFp(0x003110d3,
2074            // Test RTZ
2075            {std::tuple{1.0000001f, 0.000000059604645f, 1.0000001f},
2076             {1.0000002f, 0.000000059604645f, 1.0000002f},
2077             {1.0000004f, 0.000000059604645f, 1.0000004f},
2078             {-1.0000001f, -0.000000059604645f, -1.0000001f},
2079             {-1.0000002f, -0.000000059604645f, -1.0000002f},
2080             {-1.0000004f, -0.000000059604645f, -1.0000004f}});
2081   // FAdd.S
2082   TestOpFp(0x003120d3,
2083            // Test RDN
2084            {std::tuple{1.0000001f, 0.000000059604645f, 1.0000001f},
2085             {1.0000002f, 0.000000059604645f, 1.0000002f},
2086             {1.0000004f, 0.000000059604645f, 1.0000004f},
2087             {-1.0000001f, -0.000000059604645f, -1.0000002f},
2088             {-1.0000002f, -0.000000059604645f, -1.0000004f},
2089             {-1.0000004f, -0.000000059604645f, -1.0000005f}});
2090   // FAdd.S
2091   TestOpFp(0x003130d3,
2092            // Test RUP
2093            {std::tuple{1.0000001f, 0.000000059604645f, 1.0000002f},
2094             {1.0000002f, 0.000000059604645f, 1.0000004f},
2095             {1.0000004f, 0.000000059604645f, 1.0000005f},
2096             {-1.0000001f, -0.000000059604645f, -1.0000001f},
2097             {-1.0000002f, -0.000000059604645f, -1.0000002f},
2098             {-1.0000004f, -0.000000059604645f, -1.0000004f}});
2099   // FAdd.S
2100   TestOpFp(0x003140d3,
2101            // Test RMM
2102            {std::tuple{1.0000001f, 0.000000059604645f, 1.0000002f},
2103             {1.0000002f, 0.000000059604645f, 1.0000004f},
2104             {1.0000004f, 0.000000059604645f, 1.0000005f},
2105             {-1.0000001f, -0.000000059604645f, -1.0000002f},
2106             {-1.0000002f, -0.000000059604645f, -1.0000004f},
2107             {-1.0000004f, -0.000000059604645f, -1.0000005f}});
2108 
2109   // FAdd.D
2110   TestOpFp(0x023100d3,
2111            // Test RNE
2112            {std::tuple{1.0000000000000002, 0.00000000000000011102230246251565, 1.0000000000000004},
2113             {1.0000000000000004, 0.00000000000000011102230246251565, 1.0000000000000004},
2114             {1.0000000000000007, 0.00000000000000011102230246251565, 1.0000000000000009},
2115             {-1.0000000000000002, -0.00000000000000011102230246251565, -1.0000000000000004},
2116             {-1.0000000000000004, -0.00000000000000011102230246251565, -1.0000000000000004},
2117             {-1.0000000000000007, -0.00000000000000011102230246251565, -1.0000000000000009}});
2118   // FAdd.D
2119   TestOpFp(0x023110d3,
2120            // Test RTZ
2121            {std::tuple{1.0000000000000002, 0.00000000000000011102230246251565, 1.0000000000000002},
2122             {1.0000000000000004, 0.00000000000000011102230246251565, 1.0000000000000004},
2123             {1.0000000000000007, 0.00000000000000011102230246251565, 1.0000000000000007},
2124             {-1.0000000000000002, -0.00000000000000011102230246251565, -1.0000000000000002},
2125             {-1.0000000000000004, -0.00000000000000011102230246251565, -1.0000000000000004},
2126             {-1.0000000000000007, -0.00000000000000011102230246251565, -1.0000000000000007}});
2127   // FAdd.D
2128   TestOpFp(0x023120d3,
2129            // Test RDN
2130            {std::tuple{1.0000000000000002, 0.00000000000000011102230246251565, 1.0000000000000002},
2131             {1.0000000000000004, 0.00000000000000011102230246251565, 1.0000000000000004},
2132             {1.0000000000000007, 0.00000000000000011102230246251565, 1.0000000000000007},
2133             {-1.0000000000000002, -0.00000000000000011102230246251565, -1.0000000000000004},
2134             {-1.0000000000000004, -0.00000000000000011102230246251565, -1.0000000000000007},
2135             {-1.0000000000000007, -0.00000000000000011102230246251565, -1.0000000000000009}});
2136   // FAdd.D
2137   TestOpFp(0x023130d3,
2138            // Test RUP
2139            {std::tuple{1.0000000000000002, 0.00000000000000011102230246251565, 1.0000000000000004},
2140             {1.0000000000000004, 0.00000000000000011102230246251565, 1.0000000000000007},
2141             {1.0000000000000007, 0.00000000000000011102230246251565, 1.0000000000000009},
2142             {-1.0000000000000002, -0.00000000000000011102230246251565, -1.0000000000000002},
2143             {-1.0000000000000004, -0.00000000000000011102230246251565, -1.0000000000000004},
2144             {-1.0000000000000007, -0.00000000000000011102230246251565, -1.0000000000000007}});
2145   // FAdd.D
2146   TestOpFp(0x023140d3,
2147            // Test RMM
2148            {std::tuple{1.0000000000000002, 0.00000000000000011102230246251565, 1.0000000000000004},
2149             {1.0000000000000004, 0.00000000000000011102230246251565, 1.0000000000000007},
2150             {1.0000000000000007, 0.00000000000000011102230246251565, 1.0000000000000009},
2151             {-1.0000000000000002, -0.00000000000000011102230246251565, -1.0000000000000004},
2152             {-1.0000000000000004, -0.00000000000000011102230246251565, -1.0000000000000007},
2153             {-1.0000000000000007, -0.00000000000000011102230246251565, -1.0000000000000009}});
2154 }
2155 
2156 TEST_F(TESTSUITE, LoadFpInstructions) {
2157   // Offset is always 8.
2158   TestLoadFp(0x00812087, kDataToLoad | 0xffffffff00000000ULL);
2159   TestLoadFp(0x00813087, kDataToLoad);
2160 }
2161 
2162 TEST_F(TESTSUITE, StoreFpInstructions) {
2163   // Offset is always 8.
2164   // Fsw
2165   TestStoreFp(0x0020a427, kDataToStore & 0xffff'ffffULL);
2166   // Fsd
2167   TestStoreFp(0x0020b427, kDataToStore);
2168 }
2169 
2170 #if defined(TESTING_INTERPRETER) || defined(TESTING_LITE_TRANSLATOR)
2171 
2172 TEST_F(TESTSUITE, TestVsetvl) {
2173   constexpr uint64_t kVill =
2174       0b1'0000000'00000000'00000000'00000000'00000000'00000000'00000000'00000000;
2175   // Vsetvl, rs1 != x0
2176   TestVsetvl(0x803170d7,
2177              {
2178                  // Valid combinations.
2179                  {~0ULL, ~0ULL, ~0ULL, 005, 2, 005},
2180                  {~0ULL, ~0ULL, ~0ULL, 006, 4, 006},
2181                  {~0ULL, ~0ULL, ~0ULL, 007, 8, 007},
2182                  {~0ULL, ~0ULL, ~0ULL, 000, 16, 000},
2183                  {~0ULL, ~0ULL, ~0ULL, 001, 32, 001},
2184                  {~0ULL, ~0ULL, ~0ULL, 002, 64, 002},
2185                  {~0ULL, ~0ULL, ~0ULL, 003, 128, 003},
2186                  {~0ULL, ~0ULL, ~0ULL, 015, 1, 015},
2187                  {~0ULL, ~0ULL, ~0ULL, 016, 2, 016},
2188                  {~0ULL, ~0ULL, ~0ULL, 017, 4, 017},
2189                  {~0ULL, ~0ULL, ~0ULL, 010, 8, 010},
2190                  {~0ULL, ~0ULL, ~0ULL, 011, 16, 011},
2191                  {~0ULL, ~0ULL, ~0ULL, 012, 32, 012},
2192                  {~0ULL, ~0ULL, ~0ULL, 013, 64, 013},
2193                  {~0ULL, ~0ULL, ~0ULL, 026, 1, 026},
2194                  {~0ULL, ~0ULL, ~0ULL, 027, 2, 027},
2195                  {~0ULL, ~0ULL, ~0ULL, 020, 4, 020},
2196                  {~0ULL, ~0ULL, ~0ULL, 021, 8, 021},
2197                  {~0ULL, ~0ULL, ~0ULL, 022, 16, 022},
2198                  {~0ULL, ~0ULL, ~0ULL, 023, 32, 023},
2199                  {~0ULL, ~0ULL, ~0ULL, 037, 1, 037},
2200                  {~0ULL, ~0ULL, ~0ULL, 030, 2, 030},
2201                  {~0ULL, ~0ULL, ~0ULL, 031, 4, 031},
2202                  {~0ULL, ~0ULL, ~0ULL, 032, 8, 032},
2203                  {~0ULL, ~0ULL, ~0ULL, 033, 16, 033},
2204                  // Invalid combinations.
2205                  {~0ULL, ~0ULL, ~0ULL, 004, 0, kVill},
2206                  {~0ULL, ~0ULL, ~0ULL, 014, 0, kVill},
2207                  {~0ULL, ~0ULL, ~0ULL, 024, 0, kVill},
2208                  {~0ULL, ~0ULL, ~0ULL, 025, 0, kVill},
2209                  {~0ULL, ~0ULL, ~0ULL, 034, 0, kVill},
2210                  {~0ULL, ~0ULL, ~0ULL, 035, 0, kVill},
2211                  {~0ULL, ~0ULL, ~0ULL, 036, 0, kVill},
2212                  // Invalid sizes.
2213                  {~0ULL, ~0ULL, ~0ULL, 040, 0, kVill},
2214                  {~0ULL, ~0ULL, ~0ULL, 041, 0, kVill},
2215                  {~0ULL, ~0ULL, ~0ULL, 042, 0, kVill},
2216                  {~0ULL, ~0ULL, ~0ULL, 043, 0, kVill},
2217                  {~0ULL, ~0ULL, ~0ULL, 044, 0, kVill},
2218                  {~0ULL, ~0ULL, ~0ULL, 045, 0, kVill},
2219                  {~0ULL, ~0ULL, ~0ULL, 046, 0, kVill},
2220                  {~0ULL, ~0ULL, ~0ULL, 047, 0, kVill},
2221                  {~0ULL, ~0ULL, ~0ULL, 050, 0, kVill},
2222                  {~0ULL, ~0ULL, ~0ULL, 051, 0, kVill},
2223                  {~0ULL, ~0ULL, ~0ULL, 052, 0, kVill},
2224                  {~0ULL, ~0ULL, ~0ULL, 053, 0, kVill},
2225                  {~0ULL, ~0ULL, ~0ULL, 054, 0, kVill},
2226                  {~0ULL, ~0ULL, ~0ULL, 055, 0, kVill},
2227                  {~0ULL, ~0ULL, ~0ULL, 056, 0, kVill},
2228                  {~0ULL, ~0ULL, ~0ULL, 057, 0, kVill},
2229                  {~0ULL, ~0ULL, ~0ULL, 060, 0, kVill},
2230                  {~0ULL, ~0ULL, ~0ULL, 061, 0, kVill},
2231                  {~0ULL, ~0ULL, ~0ULL, 062, 0, kVill},
2232                  {~0ULL, ~0ULL, ~0ULL, 063, 0, kVill},
2233                  {~0ULL, ~0ULL, ~0ULL, 064, 0, kVill},
2234                  {~0ULL, ~0ULL, ~0ULL, 065, 0, kVill},
2235                  {~0ULL, ~0ULL, ~0ULL, 066, 0, kVill},
2236                  {~0ULL, ~0ULL, ~0ULL, 067, 0, kVill},
2237                  {~0ULL, ~0ULL, ~0ULL, 070, 0, kVill},
2238                  {~0ULL, ~0ULL, ~0ULL, 071, 0, kVill},
2239                  {~0ULL, ~0ULL, ~0ULL, 072, 0, kVill},
2240                  {~0ULL, ~0ULL, ~0ULL, 073, 0, kVill},
2241                  {~0ULL, ~0ULL, ~0ULL, 074, 0, kVill},
2242                  {~0ULL, ~0ULL, ~0ULL, 075, 0, kVill},
2243                  {~0ULL, ~0ULL, ~0ULL, 076, 0, kVill},
2244                  {~0ULL, ~0ULL, ~0ULL, 077, 0, kVill},
2245                  // Vma/vta bits.
2246                  {~0ULL, ~0ULL, ~0ULL, 0100, 16, 0100},
2247                  {~0ULL, ~0ULL, ~0ULL, 0200, 16, 0200},
2248                  {~0ULL, ~0ULL, ~0ULL, 0300, 16, 0300},
2249                  // Extra bits ignored as permitted by RISC-V specification.
2250                  {~0ULL, ~0ULL, ~0ULL, 0400, 16, 0000},
2251                  {~0ULL, ~0ULL, ~0ULL, 0500, 16, 0100},
2252                  {~0ULL, ~0ULL, ~0ULL, 0600, 16, 0200},
2253                  {~0ULL, ~0ULL, ~0ULL, 0700, 16, 0300},
2254                  // Avl handling.
2255                  {~0ULL, ~0ULL, 67, 003, 67, 003},
2256                  {~0ULL, ~0ULL, 151, 003, 76, 003},
2257                  {~0ULL, ~0ULL, 256, 003, 128, 003},
2258                  {~0ULL, ~0ULL, 257, 003, 128, 003},
2259              });
2260   // vsetvl rs1 == x0, rd != x0
2261   TestVsetvl(0x803070d7, {{~0ULL, ~0ULL, 42, 000, 16, 000}});
2262   // vsetvl rs1 == x0, rd == x0
2263   TestVsetvl(0x80307057,
2264              {// Valid change of vtype.
2265               {9, 000, 128, 022, 9, 022},
2266               // Invalid change of vtype.
2267               {8, 001, 128, 022, 0, kVill}});
2268   // vsetvli rs1 != x0
2269   TestVsetvl(0x12170d7, {{~0ULL, ~0ULL, 128, 0, 16, 022}});
2270   // vsetvli rs1 == x0, rd != x0
2271   TestVsetvl(0x12070d7, {{~0ULL, ~0ULL, 42, 000, 16, 022}});
2272   // vsetvli, rs1 == x0, rd == x0
2273   TestVsetvl(0x1207057,
2274              {// Valid change of vtype.
2275               {9, 000, 128, ~0ULL, 9, 022},
2276               // Invalid change of vtype.
2277               {8, 001, 128, ~0ULL, 0, kVill}});
2278   // vsetivli rs1 != x0
2279   TestVsetvl(0xc12870d7, {{~0ULL, ~0ULL, 128, 0, 16, 022}});
2280   // vsetivli rs1 == x0, rd != x0
2281   TestVsetvl(0xc12070d7, {{~0ULL, ~0ULL, 42, 000, 16, 022}});
2282   // vsetivli, rs1 == x0, rd == x0
2283   TestVsetvl(0xc1207057,
2284              {// Valid change of vtype.
2285               {9, 000, 128, ~0ULL, 9, 022},
2286               // Invalid change of vtype.
2287               {8, 001, 128, ~0ULL, 0, kVill}});
2288 }
2289 
2290 #endif  // defined(TESTING_INTERPRETER) || defined(TESTING_LITE_TRANSLATOR)
2291