1 /*
2  * Copyright (C) 2023 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef ART_COMPILER_UTILS_RISCV64_ASSEMBLER_RISCV64_H_
18 #define ART_COMPILER_UTILS_RISCV64_ASSEMBLER_RISCV64_H_
19 
20 #include <cstdint>
21 #include <string>
22 #include <utility>
23 #include <vector>
24 
25 #include "arch/riscv64/instruction_set_features_riscv64.h"
26 #include "base/arena_containers.h"
27 #include "base/globals.h"
28 #include "base/macros.h"
29 #include "base/pointer_size.h"
30 #include "managed_register_riscv64.h"
31 #include "utils/assembler.h"
32 #include "utils/label.h"
33 
34 namespace art HIDDEN {
35 namespace riscv64 {
36 
37 class ScratchRegisterScope;
38 
39 static constexpr size_t kRiscv64HalfwordSize = 2;
40 static constexpr size_t kRiscv64WordSize = 4;
41 static constexpr size_t kRiscv64DoublewordSize = 8;
42 static constexpr size_t kRiscv64FloatRegSizeInBytes = 8;
43 
44 // The `Riscv64Extension` enumeration is used for restricting the instructions that the assembler
45 // can use. Some restrictions are checked only in debug mode (for example load and store
46 // instructions check `kLoadStore`), other restrictions are checked at run time and affect the
47 // emitted code (for example, the `SextW()` pseudo-instruction selects between an implementation
48 // from "Zcb", "Zbb" and a two-instruction sequence from the basic instruction set.
49 enum class Riscv64Extension : uint32_t {
50   kLoadStore,  // Pseudo-extension encompassing all loads and stores. Used to check that
51                // we do not have loads and stores in the middle of a LR/SC sequence.
52   kZifencei,
53   kM,
54   kA,
55   kZicsr,
56   kF,
57   kD,
58   kZba,
59   kZbb,
60   kZbs,  // TODO(riscv64): Implement "Zbs" instructions.
61   kV,
62   kZca,  // "C" extension instructions except floating point loads/stores.
63   kZcd,  // "C" extension double loads/stores.
64          // Note: RV64 cannot implement Zcf ("C" extension float loads/stores).
65   kZcb,  // Simple 16-bit operations not present in the original "C" extension.
66 
67   kLast = kZcb
68 };
69 
70 using Riscv64ExtensionMask = uint32_t;
71 
Riscv64ExtensionBit(Riscv64Extension ext)72 constexpr Riscv64ExtensionMask Riscv64ExtensionBit(Riscv64Extension ext) {
73   return 1u << enum_cast<>(ext);
74 }
75 
76 constexpr Riscv64ExtensionMask kRiscv64AllExtensionsMask =
77     MaxInt<Riscv64ExtensionMask>(enum_cast<>(Riscv64Extension::kLast) + 1);
78 
79 // Extensions allowed in a LR/SC sequence (between the LR and SC).
80 constexpr Riscv64ExtensionMask kRiscv64LrScSequenceExtensionsMask =
81     Riscv64ExtensionBit(Riscv64Extension::kZca);
82 
83 enum class FPRoundingMode : uint32_t {
84   kRNE = 0x0,  // Round to Nearest, ties to Even
85   kRTZ = 0x1,  // Round towards Zero
86   kRDN = 0x2,  // Round Down (towards −Infinity)
87   kRUP = 0x3,  // Round Up (towards +Infinity)
88   kRMM = 0x4,  // Round to Nearest, ties to Max Magnitude
89   kDYN = 0x7,  // Dynamic rounding mode
90   kDefault = kDYN,
91   // Some instructions never need to round even though the spec includes the RM field.
92   // To simplify testing, emit the RM as 0 by default for these instructions because that's what
93   // `clang` does and because the `llvm-objdump` fails to disassemble the other rounding modes.
94   kIgnored = 0
95 };
96 
97 enum class AqRl : uint32_t {
98   kNone    = 0x0,
99   kRelease = 0x1,
100   kAcquire = 0x2,
101   kAqRl    = kRelease | kAcquire
102 };
103 
104 // the type for fence
105 enum FenceType {
106   kFenceNone = 0,
107   kFenceWrite = 1,
108   kFenceRead = 2,
109   kFenceOutput = 4,
110   kFenceInput = 8,
111   kFenceDefault = 0xf,
112 };
113 
114 // Used to test the values returned by FClassS/FClassD.
115 enum FPClassMaskType {
116   kNegativeInfinity  = 0x001,
117   kNegativeNormal    = 0x002,
118   kNegativeSubnormal = 0x004,
119   kNegativeZero      = 0x008,
120   kPositiveZero      = 0x010,
121   kPositiveSubnormal = 0x020,
122   kPositiveNormal    = 0x040,
123   kPositiveInfinity  = 0x080,
124   kSignalingNaN      = 0x100,
125   kQuietNaN          = 0x200,
126 };
127 
128 enum class CSRAddress : uint32_t {
129   kVstart = 0x008,     // Vector start position, URW
130   kVxsat = 0x009,      // Fixed-Point Saturate Flag, URW
131   kVxrm = 0x00A,       // Fixed-Point Rounding Mode, URW
132   kReserved1 = 0x00B,  // Reserved for future vector CSRs
133   kReserved2 = 0x00C,
134   kReserved3 = 0x00D,
135   kReserved4 = 0x00E,
136   kVcsr = 0x00F,   // Vector control and status register, URW
137   kVl = 0xC20,     // Vector length, URO
138   kVtype = 0xC21,  // Vector data type register, URO
139   kVlenb = 0xC22,  // VLEN/8 (vector register length in bytes), URO
140 };
141 
142 class Riscv64Label : public Label {
143  public:
Riscv64Label()144   Riscv64Label() : prev_branch_id_(kNoPrevBranchId) {}
145 
Riscv64Label(Riscv64Label && src)146   Riscv64Label(Riscv64Label&& src) noexcept
147       // NOLINTNEXTLINE - src.prev_branch_id_ is valid after the move
148       : Label(std::move(src)), prev_branch_id_(src.prev_branch_id_) {}
149 
150  private:
151   static constexpr uint32_t kNoPrevBranchId = std::numeric_limits<uint32_t>::max();
152 
153   uint32_t prev_branch_id_;  // To get distance from preceding branch, if any.
154 
155   friend class Riscv64Assembler;
156   DISALLOW_COPY_AND_ASSIGN(Riscv64Label);
157 };
158 
159 // Assembler literal is a value embedded in code, retrieved using a PC-relative load.
160 class Literal {
161  public:
162   static constexpr size_t kMaxSize = 8;
163 
Literal(uint32_t size,const uint8_t * data)164   Literal(uint32_t size, const uint8_t* data) : label_(), size_(size) {
165     DCHECK_LE(size, Literal::kMaxSize);
166     memcpy(data_, data, size);
167   }
168 
169   template <typename T>
GetValue()170   T GetValue() const {
171     DCHECK_EQ(size_, sizeof(T));
172     T value;
173     memcpy(&value, data_, sizeof(T));
174     return value;
175   }
176 
GetSize()177   uint32_t GetSize() const { return size_; }
178 
GetData()179   const uint8_t* GetData() const { return data_; }
180 
GetLabel()181   Riscv64Label* GetLabel() { return &label_; }
182 
GetLabel()183   const Riscv64Label* GetLabel() const { return &label_; }
184 
185  private:
186   Riscv64Label label_;
187   const uint32_t size_;
188   uint8_t data_[kMaxSize];
189 
190   DISALLOW_COPY_AND_ASSIGN(Literal);
191 };
192 
193 // Jump table: table of labels emitted after the code and before the literals. Similar to literals.
194 class JumpTable {
195  public:
JumpTable(ArenaVector<Riscv64Label * > && labels)196   explicit JumpTable(ArenaVector<Riscv64Label*>&& labels) : label_(), labels_(std::move(labels)) {}
197 
GetSize()198   size_t GetSize() const { return labels_.size() * sizeof(int32_t); }
199 
GetData()200   const ArenaVector<Riscv64Label*>& GetData() const { return labels_; }
201 
GetLabel()202   Riscv64Label* GetLabel() { return &label_; }
203 
GetLabel()204   const Riscv64Label* GetLabel() const { return &label_; }
205 
206  private:
207   Riscv64Label label_;
208   ArenaVector<Riscv64Label*> labels_;
209 
210   DISALLOW_COPY_AND_ASSIGN(JumpTable);
211 };
212 
213 class Riscv64Assembler final : public Assembler {
214  public:
215   explicit Riscv64Assembler(ArenaAllocator* allocator,
216                             const Riscv64InstructionSetFeatures* instruction_set_features = nullptr)
217       : Riscv64Assembler(allocator,
218                          instruction_set_features != nullptr
219                              ? ConvertExtensions(instruction_set_features)
220                              : kRiscv64AllExtensionsMask) {}
221 
Riscv64Assembler(ArenaAllocator * allocator,Riscv64ExtensionMask enabled_extensions)222   Riscv64Assembler(ArenaAllocator* allocator, Riscv64ExtensionMask enabled_extensions)
223       : Assembler(allocator),
224         branches_(allocator->Adapter(kArenaAllocAssembler)),
225         finalized_(false),
226         overwriting_(false),
227         overwrite_location_(0),
228         literals_(allocator->Adapter(kArenaAllocAssembler)),
229         long_literals_(allocator->Adapter(kArenaAllocAssembler)),
230         jump_tables_(allocator->Adapter(kArenaAllocAssembler)),
231         last_position_adjustment_(0),
232         last_old_position_(0),
233         last_branch_id_(0),
234         enabled_extensions_(enabled_extensions),
235         available_scratch_core_registers_((1u << TMP) | (1u << TMP2)),
236         available_scratch_fp_registers_(1u << FTMP) {
237     cfi().DelayEmittingAdvancePCs();
238   }
239 
~Riscv64Assembler()240   virtual ~Riscv64Assembler() {
241     for (auto& branch : branches_) {
242       CHECK(branch.IsResolved());
243     }
244   }
245 
CodeSize()246   size_t CodeSize() const override { return Assembler::CodeSize(); }
cfi()247   DebugFrameOpCodeWriterForAssembler& cfi() { return Assembler::cfi(); }
248 
IsExtensionEnabled(Riscv64Extension ext)249   bool IsExtensionEnabled(Riscv64Extension ext) const {
250     return (enabled_extensions_ & Riscv64ExtensionBit(ext)) != 0u;
251   }
252 
253   // According to "The RISC-V Instruction Set Manual"
254 
255   // LUI/AUIPC (RV32I, with sign-extension on RV64I), opcode = 0x17, 0x37
256   // Note: These take a 20-bit unsigned value to align with the clang assembler for testing,
257   // but the value stored in the register shall actually be sign-extended to 64 bits.
258   void Lui(XRegister rd, uint32_t imm20);
259   void Auipc(XRegister rd, uint32_t imm20);
260 
261   // Jump instructions (RV32I), opcode = 0x67, 0x6f
262   void Jal(XRegister rd, int32_t offset);
263   void Jalr(XRegister rd, XRegister rs1, int32_t offset);
264 
265   // Branch instructions (RV32I), opcode = 0x63, funct3 from 0x0 ~ 0x1 and 0x4 ~ 0x7
266   void Beq(XRegister rs1, XRegister rs2, int32_t offset);
267   void Bne(XRegister rs1, XRegister rs2, int32_t offset);
268   void Blt(XRegister rs1, XRegister rs2, int32_t offset);
269   void Bge(XRegister rs1, XRegister rs2, int32_t offset);
270   void Bltu(XRegister rs1, XRegister rs2, int32_t offset);
271   void Bgeu(XRegister rs1, XRegister rs2, int32_t offset);
272 
273   // Load instructions (RV32I+RV64I): opcode = 0x03, funct3 from 0x0 ~ 0x6
274   void Lb(XRegister rd, XRegister rs1, int32_t offset);
275   void Lh(XRegister rd, XRegister rs1, int32_t offset);
276   void Lw(XRegister rd, XRegister rs1, int32_t offset);
277   void Ld(XRegister rd, XRegister rs1, int32_t offset);
278   void Lbu(XRegister rd, XRegister rs1, int32_t offset);
279   void Lhu(XRegister rd, XRegister rs1, int32_t offset);
280   void Lwu(XRegister rd, XRegister rs1, int32_t offset);
281 
282   // Store instructions (RV32I+RV64I): opcode = 0x23, funct3 from 0x0 ~ 0x3
283   void Sb(XRegister rs2, XRegister rs1, int32_t offset);
284   void Sh(XRegister rs2, XRegister rs1, int32_t offset);
285   void Sw(XRegister rs2, XRegister rs1, int32_t offset);
286   void Sd(XRegister rs2, XRegister rs1, int32_t offset);
287 
288   // IMM ALU instructions (RV32I): opcode = 0x13, funct3 from 0x0 ~ 0x7
289   void Addi(XRegister rd, XRegister rs1, int32_t imm12);
290   void Slti(XRegister rd, XRegister rs1, int32_t imm12);
291   void Sltiu(XRegister rd, XRegister rs1, int32_t imm12);
292   void Xori(XRegister rd, XRegister rs1, int32_t imm12);
293   void Ori(XRegister rd, XRegister rs1, int32_t imm12);
294   void Andi(XRegister rd, XRegister rs1, int32_t imm12);
295   void Slli(XRegister rd, XRegister rs1, int32_t shamt);
296   void Srli(XRegister rd, XRegister rs1, int32_t shamt);
297   void Srai(XRegister rd, XRegister rs1, int32_t shamt);
298 
299   // ALU instructions (RV32I): opcode = 0x33, funct3 from 0x0 ~ 0x7
300   void Add(XRegister rd, XRegister rs1, XRegister rs2);
301   void Sub(XRegister rd, XRegister rs1, XRegister rs2);
302   void Slt(XRegister rd, XRegister rs1, XRegister rs2);
303   void Sltu(XRegister rd, XRegister rs1, XRegister rs2);
304   void Xor(XRegister rd, XRegister rs1, XRegister rs2);
305   void Or(XRegister rd, XRegister rs1, XRegister rs2);
306   void And(XRegister rd, XRegister rs1, XRegister rs2);
307   void Sll(XRegister rd, XRegister rs1, XRegister rs2);
308   void Srl(XRegister rd, XRegister rs1, XRegister rs2);
309   void Sra(XRegister rd, XRegister rs1, XRegister rs2);
310 
311   // 32bit Imm ALU instructions (RV64I): opcode = 0x1b, funct3 from 0x0, 0x1, 0x5
312   void Addiw(XRegister rd, XRegister rs1, int32_t imm12);
313   void Slliw(XRegister rd, XRegister rs1, int32_t shamt);
314   void Srliw(XRegister rd, XRegister rs1, int32_t shamt);
315   void Sraiw(XRegister rd, XRegister rs1, int32_t shamt);
316 
317   // 32bit ALU instructions (RV64I): opcode = 0x3b, funct3 from 0x0 ~ 0x7
318   void Addw(XRegister rd, XRegister rs1, XRegister rs2);
319   void Subw(XRegister rd, XRegister rs1, XRegister rs2);
320   void Sllw(XRegister rd, XRegister rs1, XRegister rs2);
321   void Srlw(XRegister rd, XRegister rs1, XRegister rs2);
322   void Sraw(XRegister rd, XRegister rs1, XRegister rs2);
323 
324   // Environment call and breakpoint (RV32I), opcode = 0x73
325   void Ecall();
326   void Ebreak();
327 
328   // Fence instruction (RV32I): opcode = 0xf, funct3 = 0
329   void Fence(uint32_t pred = kFenceDefault, uint32_t succ = kFenceDefault);
330   void FenceTso();
331 
332   // "Zifencei" Standard Extension, opcode = 0xf, funct3 = 1
333   void FenceI();
334 
335   // RV32M Standard Extension: opcode = 0x33, funct3 from 0x0 ~ 0x7
336   void Mul(XRegister rd, XRegister rs1, XRegister rs2);
337   void Mulh(XRegister rd, XRegister rs1, XRegister rs2);
338   void Mulhsu(XRegister rd, XRegister rs1, XRegister rs2);
339   void Mulhu(XRegister rd, XRegister rs1, XRegister rs2);
340   void Div(XRegister rd, XRegister rs1, XRegister rs2);
341   void Divu(XRegister rd, XRegister rs1, XRegister rs2);
342   void Rem(XRegister rd, XRegister rs1, XRegister rs2);
343   void Remu(XRegister rd, XRegister rs1, XRegister rs2);
344 
345   // RV64M Standard Extension: opcode = 0x3b, funct3 0x0 and from 0x4 ~ 0x7
346   void Mulw(XRegister rd, XRegister rs1, XRegister rs2);
347   void Divw(XRegister rd, XRegister rs1, XRegister rs2);
348   void Divuw(XRegister rd, XRegister rs1, XRegister rs2);
349   void Remw(XRegister rd, XRegister rs1, XRegister rs2);
350   void Remuw(XRegister rd, XRegister rs1, XRegister rs2);
351 
352   // RV32A/RV64A Standard Extension
353   void LrW(XRegister rd, XRegister rs1, AqRl aqrl);
354   void LrD(XRegister rd, XRegister rs1, AqRl aqrl);
355   void ScW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
356   void ScD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
357   void AmoSwapW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
358   void AmoSwapD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
359   void AmoAddW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
360   void AmoAddD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
361   void AmoXorW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
362   void AmoXorD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
363   void AmoAndW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
364   void AmoAndD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
365   void AmoOrW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
366   void AmoOrD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
367   void AmoMinW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
368   void AmoMinD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
369   void AmoMaxW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
370   void AmoMaxD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
371   void AmoMinuW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
372   void AmoMinuD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
373   void AmoMaxuW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
374   void AmoMaxuD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
375 
376   // "Zicsr" Standard Extension, opcode = 0x73, funct3 from 0x1 ~ 0x3 and 0x5 ~ 0x7
377   void Csrrw(XRegister rd, uint32_t csr, XRegister rs1);
378   void Csrrs(XRegister rd, uint32_t csr, XRegister rs1);
379   void Csrrc(XRegister rd, uint32_t csr, XRegister rs1);
380   void Csrrwi(XRegister rd, uint32_t csr, uint32_t uimm5);
381   void Csrrsi(XRegister rd, uint32_t csr, uint32_t uimm5);
382   void Csrrci(XRegister rd, uint32_t csr, uint32_t uimm5);
383 
384   // FP load/store instructions (RV32F+RV32D): opcode = 0x07, 0x27
385   void FLw(FRegister rd, XRegister rs1, int32_t offset);
386   void FLd(FRegister rd, XRegister rs1, int32_t offset);
387   void FSw(FRegister rs2, XRegister rs1, int32_t offset);
388   void FSd(FRegister rs2, XRegister rs1, int32_t offset);
389 
390   // FP FMA instructions (RV32F+RV32D): opcode = 0x43, 0x47, 0x4b, 0x4f
391   void FMAddS(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3, FPRoundingMode frm);
392   void FMAddD(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3, FPRoundingMode frm);
393   void FMSubS(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3, FPRoundingMode frm);
394   void FMSubD(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3, FPRoundingMode frm);
395   void FNMSubS(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3, FPRoundingMode frm);
396   void FNMSubD(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3, FPRoundingMode frm);
397   void FNMAddS(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3, FPRoundingMode frm);
398   void FNMAddD(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3, FPRoundingMode frm);
399 
400   // FP FMA instruction helpers passing the default rounding mode.
FMAddS(FRegister rd,FRegister rs1,FRegister rs2,FRegister rs3)401   void FMAddS(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3) {
402     FMAddS(rd, rs1, rs2, rs3, FPRoundingMode::kDefault);
403   }
FMAddD(FRegister rd,FRegister rs1,FRegister rs2,FRegister rs3)404   void FMAddD(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3) {
405     FMAddD(rd, rs1, rs2, rs3, FPRoundingMode::kDefault);
406   }
FMSubS(FRegister rd,FRegister rs1,FRegister rs2,FRegister rs3)407   void FMSubS(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3) {
408     FMSubS(rd, rs1, rs2, rs3, FPRoundingMode::kDefault);
409   }
FMSubD(FRegister rd,FRegister rs1,FRegister rs2,FRegister rs3)410   void FMSubD(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3) {
411     FMSubD(rd, rs1, rs2, rs3, FPRoundingMode::kDefault);
412   }
FNMSubS(FRegister rd,FRegister rs1,FRegister rs2,FRegister rs3)413   void FNMSubS(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3) {
414     FNMSubS(rd, rs1, rs2, rs3, FPRoundingMode::kDefault);
415   }
FNMSubD(FRegister rd,FRegister rs1,FRegister rs2,FRegister rs3)416   void FNMSubD(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3) {
417     FNMSubD(rd, rs1, rs2, rs3, FPRoundingMode::kDefault);
418   }
FNMAddS(FRegister rd,FRegister rs1,FRegister rs2,FRegister rs3)419   void FNMAddS(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3) {
420     FNMAddS(rd, rs1, rs2, rs3, FPRoundingMode::kDefault);
421   }
FNMAddD(FRegister rd,FRegister rs1,FRegister rs2,FRegister rs3)422   void FNMAddD(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3) {
423     FNMAddD(rd, rs1, rs2, rs3, FPRoundingMode::kDefault);
424   }
425 
426   // Simple FP instructions (RV32F+RV32D): opcode = 0x53, funct7 = 0b0XXXX0D
427   void FAddS(FRegister rd, FRegister rs1, FRegister rs2, FPRoundingMode frm);
428   void FAddD(FRegister rd, FRegister rs1, FRegister rs2, FPRoundingMode frm);
429   void FSubS(FRegister rd, FRegister rs1, FRegister rs2, FPRoundingMode frm);
430   void FSubD(FRegister rd, FRegister rs1, FRegister rs2, FPRoundingMode frm);
431   void FMulS(FRegister rd, FRegister rs1, FRegister rs2, FPRoundingMode frm);
432   void FMulD(FRegister rd, FRegister rs1, FRegister rs2, FPRoundingMode frm);
433   void FDivS(FRegister rd, FRegister rs1, FRegister rs2, FPRoundingMode frm);
434   void FDivD(FRegister rd, FRegister rs1, FRegister rs2, FPRoundingMode frm);
435   void FSqrtS(FRegister rd, FRegister rs1, FPRoundingMode frm);
436   void FSqrtD(FRegister rd, FRegister rs1, FPRoundingMode frm);
437   void FSgnjS(FRegister rd, FRegister rs1, FRegister rs2);
438   void FSgnjD(FRegister rd, FRegister rs1, FRegister rs2);
439   void FSgnjnS(FRegister rd, FRegister rs1, FRegister rs2);
440   void FSgnjnD(FRegister rd, FRegister rs1, FRegister rs2);
441   void FSgnjxS(FRegister rd, FRegister rs1, FRegister rs2);
442   void FSgnjxD(FRegister rd, FRegister rs1, FRegister rs2);
443   void FMinS(FRegister rd, FRegister rs1, FRegister rs2);
444   void FMinD(FRegister rd, FRegister rs1, FRegister rs2);
445   void FMaxS(FRegister rd, FRegister rs1, FRegister rs2);
446   void FMaxD(FRegister rd, FRegister rs1, FRegister rs2);
447   void FCvtSD(FRegister rd, FRegister rs1, FPRoundingMode frm);
448   void FCvtDS(FRegister rd, FRegister rs1, FPRoundingMode frm);
449 
450   // Simple FP instruction helpers passing the default rounding mode.
FAddS(FRegister rd,FRegister rs1,FRegister rs2)451   void FAddS(FRegister rd, FRegister rs1, FRegister rs2) {
452     FAddS(rd, rs1, rs2, FPRoundingMode::kDefault);
453   }
FAddD(FRegister rd,FRegister rs1,FRegister rs2)454   void FAddD(FRegister rd, FRegister rs1, FRegister rs2) {
455     FAddD(rd, rs1, rs2, FPRoundingMode::kDefault);
456   }
FSubS(FRegister rd,FRegister rs1,FRegister rs2)457   void FSubS(FRegister rd, FRegister rs1, FRegister rs2) {
458     FSubS(rd, rs1, rs2, FPRoundingMode::kDefault);
459   }
FSubD(FRegister rd,FRegister rs1,FRegister rs2)460   void FSubD(FRegister rd, FRegister rs1, FRegister rs2) {
461     FSubD(rd, rs1, rs2, FPRoundingMode::kDefault);
462   }
FMulS(FRegister rd,FRegister rs1,FRegister rs2)463   void FMulS(FRegister rd, FRegister rs1, FRegister rs2) {
464     FMulS(rd, rs1, rs2, FPRoundingMode::kDefault);
465   }
FMulD(FRegister rd,FRegister rs1,FRegister rs2)466   void FMulD(FRegister rd, FRegister rs1, FRegister rs2) {
467     FMulD(rd, rs1, rs2, FPRoundingMode::kDefault);
468   }
FDivS(FRegister rd,FRegister rs1,FRegister rs2)469   void FDivS(FRegister rd, FRegister rs1, FRegister rs2) {
470     FDivS(rd, rs1, rs2, FPRoundingMode::kDefault);
471   }
FDivD(FRegister rd,FRegister rs1,FRegister rs2)472   void FDivD(FRegister rd, FRegister rs1, FRegister rs2) {
473     FDivD(rd, rs1, rs2, FPRoundingMode::kDefault);
474   }
FSqrtS(FRegister rd,FRegister rs1)475   void FSqrtS(FRegister rd, FRegister rs1) {
476     FSqrtS(rd, rs1, FPRoundingMode::kDefault);
477   }
FSqrtD(FRegister rd,FRegister rs1)478   void FSqrtD(FRegister rd, FRegister rs1) {
479     FSqrtD(rd, rs1, FPRoundingMode::kDefault);
480   }
FCvtSD(FRegister rd,FRegister rs1)481   void FCvtSD(FRegister rd, FRegister rs1) {
482     FCvtSD(rd, rs1, FPRoundingMode::kDefault);
483   }
FCvtDS(FRegister rd,FRegister rs1)484   void FCvtDS(FRegister rd, FRegister rs1) {
485     FCvtDS(rd, rs1, FPRoundingMode::kIgnored);
486   }
487 
488   // FP compare instructions (RV32F+RV32D): opcode = 0x53, funct7 = 0b101000D
489   void FEqS(XRegister rd, FRegister rs1, FRegister rs2);
490   void FEqD(XRegister rd, FRegister rs1, FRegister rs2);
491   void FLtS(XRegister rd, FRegister rs1, FRegister rs2);
492   void FLtD(XRegister rd, FRegister rs1, FRegister rs2);
493   void FLeS(XRegister rd, FRegister rs1, FRegister rs2);
494   void FLeD(XRegister rd, FRegister rs1, FRegister rs2);
495 
496   // FP conversion instructions (RV32F+RV32D+RV64F+RV64D): opcode = 0x53, funct7 = 0b110X00D
497   void FCvtWS(XRegister rd, FRegister rs1, FPRoundingMode frm);
498   void FCvtWD(XRegister rd, FRegister rs1, FPRoundingMode frm);
499   void FCvtWuS(XRegister rd, FRegister rs1, FPRoundingMode frm);
500   void FCvtWuD(XRegister rd, FRegister rs1, FPRoundingMode frm);
501   void FCvtLS(XRegister rd, FRegister rs1, FPRoundingMode frm);
502   void FCvtLD(XRegister rd, FRegister rs1, FPRoundingMode frm);
503   void FCvtLuS(XRegister rd, FRegister rs1, FPRoundingMode frm);
504   void FCvtLuD(XRegister rd, FRegister rs1, FPRoundingMode frm);
505   void FCvtSW(FRegister rd, XRegister rs1, FPRoundingMode frm);
506   void FCvtDW(FRegister rd, XRegister rs1, FPRoundingMode frm);
507   void FCvtSWu(FRegister rd, XRegister rs1, FPRoundingMode frm);
508   void FCvtDWu(FRegister rd, XRegister rs1, FPRoundingMode frm);
509   void FCvtSL(FRegister rd, XRegister rs1, FPRoundingMode frm);
510   void FCvtDL(FRegister rd, XRegister rs1, FPRoundingMode frm);
511   void FCvtSLu(FRegister rd, XRegister rs1, FPRoundingMode frm);
512   void FCvtDLu(FRegister rd, XRegister rs1, FPRoundingMode frm);
513 
514   // FP conversion instruction helpers passing the default rounding mode.
FCvtWS(XRegister rd,FRegister rs1)515   void FCvtWS(XRegister rd, FRegister rs1) { FCvtWS(rd, rs1, FPRoundingMode::kDefault); }
FCvtWD(XRegister rd,FRegister rs1)516   void FCvtWD(XRegister rd, FRegister rs1) { FCvtWD(rd, rs1, FPRoundingMode::kDefault); }
FCvtWuS(XRegister rd,FRegister rs1)517   void FCvtWuS(XRegister rd, FRegister rs1) { FCvtWuS(rd, rs1, FPRoundingMode::kDefault); }
FCvtWuD(XRegister rd,FRegister rs1)518   void FCvtWuD(XRegister rd, FRegister rs1) { FCvtWuD(rd, rs1, FPRoundingMode::kDefault); }
FCvtLS(XRegister rd,FRegister rs1)519   void FCvtLS(XRegister rd, FRegister rs1) { FCvtLS(rd, rs1, FPRoundingMode::kDefault); }
FCvtLD(XRegister rd,FRegister rs1)520   void FCvtLD(XRegister rd, FRegister rs1) { FCvtLD(rd, rs1, FPRoundingMode::kDefault); }
FCvtLuS(XRegister rd,FRegister rs1)521   void FCvtLuS(XRegister rd, FRegister rs1) { FCvtLuS(rd, rs1, FPRoundingMode::kDefault); }
FCvtLuD(XRegister rd,FRegister rs1)522   void FCvtLuD(XRegister rd, FRegister rs1) { FCvtLuD(rd, rs1, FPRoundingMode::kDefault); }
FCvtSW(FRegister rd,XRegister rs1)523   void FCvtSW(FRegister rd, XRegister rs1) { FCvtSW(rd, rs1, FPRoundingMode::kDefault); }
FCvtDW(FRegister rd,XRegister rs1)524   void FCvtDW(FRegister rd, XRegister rs1) { FCvtDW(rd, rs1, FPRoundingMode::kIgnored); }
FCvtSWu(FRegister rd,XRegister rs1)525   void FCvtSWu(FRegister rd, XRegister rs1) { FCvtSWu(rd, rs1, FPRoundingMode::kDefault); }
FCvtDWu(FRegister rd,XRegister rs1)526   void FCvtDWu(FRegister rd, XRegister rs1) { FCvtDWu(rd, rs1, FPRoundingMode::kIgnored); }
FCvtSL(FRegister rd,XRegister rs1)527   void FCvtSL(FRegister rd, XRegister rs1) { FCvtSL(rd, rs1, FPRoundingMode::kDefault); }
FCvtDL(FRegister rd,XRegister rs1)528   void FCvtDL(FRegister rd, XRegister rs1) { FCvtDL(rd, rs1, FPRoundingMode::kDefault); }
FCvtSLu(FRegister rd,XRegister rs1)529   void FCvtSLu(FRegister rd, XRegister rs1) { FCvtSLu(rd, rs1, FPRoundingMode::kDefault); }
FCvtDLu(FRegister rd,XRegister rs1)530   void FCvtDLu(FRegister rd, XRegister rs1) { FCvtDLu(rd, rs1, FPRoundingMode::kDefault); }
531 
532   // FP move instructions (RV32F+RV32D): opcode = 0x53, funct3 = 0x0, funct7 = 0b111X00D
533   void FMvXW(XRegister rd, FRegister rs1);
534   void FMvXD(XRegister rd, FRegister rs1);
535   void FMvWX(FRegister rd, XRegister rs1);
536   void FMvDX(FRegister rd, XRegister rs1);
537 
538   // FP classify instructions (RV32F+RV32D): opcode = 0x53, funct3 = 0x1, funct7 = 0b111X00D
539   void FClassS(XRegister rd, FRegister rs1);
540   void FClassD(XRegister rd, FRegister rs1);
541 
542   // "C" Standard Extension, Compresseed Instructions
543   void CLwsp(XRegister rd, int32_t offset);
544   void CLdsp(XRegister rd, int32_t offset);
545   void CFLdsp(FRegister rd, int32_t offset);
546   void CSwsp(XRegister rs2, int32_t offset);
547   void CSdsp(XRegister rs2, int32_t offset);
548   void CFSdsp(FRegister rs2, int32_t offset);
549 
550   void CLw(XRegister rd_s, XRegister rs1_s, int32_t offset);
551   void CLd(XRegister rd_s, XRegister rs1_s, int32_t offset);
552   void CFLd(FRegister rd_s, XRegister rs1_s, int32_t offset);
553   void CSw(XRegister rs2_s, XRegister rs1_s, int32_t offset);
554   void CSd(XRegister rs2_s, XRegister rs1_s, int32_t offset);
555   void CFSd(FRegister rs2_s, XRegister rs1_s, int32_t offset);
556 
557   void CLi(XRegister rd, int32_t imm);
558   void CLui(XRegister rd, uint32_t nzimm6);
559   void CAddi(XRegister rd, int32_t nzimm);
560   void CAddiw(XRegister rd, int32_t imm);
561   void CAddi16Sp(int32_t nzimm);
562   void CAddi4Spn(XRegister rd_s, uint32_t nzuimm);
563   void CSlli(XRegister rd, int32_t shamt);
564   void CSrli(XRegister rd_s, int32_t shamt);
565   void CSrai(XRegister rd_s, int32_t shamt);
566   void CAndi(XRegister rd_s, int32_t imm);
567   void CMv(XRegister rd, XRegister rs2);
568   void CAdd(XRegister rd, XRegister rs2);
569   void CAnd(XRegister rd_s, XRegister rs2_s);
570   void COr(XRegister rd_s, XRegister rs2_s);
571   void CXor(XRegister rd_s, XRegister rs2_s);
572   void CSub(XRegister rd_s, XRegister rs2_s);
573   void CAddw(XRegister rd_s, XRegister rs2_s);
574   void CSubw(XRegister rd_s, XRegister rs2_s);
575 
576   // "Zcb" Standard Extension, part of "C", opcode = 0b00, 0b01, funct3 = 0b100.
577   void CLbu(XRegister rd_s, XRegister rs1_s, int32_t offset);
578   void CLhu(XRegister rd_s, XRegister rs1_s, int32_t offset);
579   void CLh(XRegister rd_s, XRegister rs1_s, int32_t offset);
580   void CSb(XRegister rd_s, XRegister rs1_s, int32_t offset);
581   void CSh(XRegister rd_s, XRegister rs1_s, int32_t offset);
582   void CZextB(XRegister rd_rs1_s);
583   void CSextB(XRegister rd_rs1_s);
584   void CZextH(XRegister rd_rs1_s);
585   void CSextH(XRegister rd_rs1_s);
586   void CZextW(XRegister rd_rs1_s);
587   void CNot(XRegister rd_rs1_s);
588   void CMul(XRegister rd_s, XRegister rs2_s);
589   // "Zcb" Standard Extension End; resume "C" Standard Extension.
590   // TODO(riscv64): Reorder "Zcb" after remaining "C" instructions.
591 
592   void CJ(int32_t offset);
593   void CJr(XRegister rs1);
594   void CJalr(XRegister rs1);
595   void CBeqz(XRegister rs1_s, int32_t offset);
596   void CBnez(XRegister rs1_s, int32_t offset);
597 
598   void CEbreak();
599   void CNop();
600   void CUnimp();
601 
602   // "Zba" Standard Extension, opcode = 0x1b, 0x33 or 0x3b, funct3 and funct7 varies.
603   void AddUw(XRegister rd, XRegister rs1, XRegister rs2);
604   void Sh1Add(XRegister rd, XRegister rs1, XRegister rs2);
605   void Sh1AddUw(XRegister rd, XRegister rs1, XRegister rs2);
606   void Sh2Add(XRegister rd, XRegister rs1, XRegister rs2);
607   void Sh2AddUw(XRegister rd, XRegister rs1, XRegister rs2);
608   void Sh3Add(XRegister rd, XRegister rs1, XRegister rs2);
609   void Sh3AddUw(XRegister rd, XRegister rs1, XRegister rs2);
610   void SlliUw(XRegister rd, XRegister rs1, int32_t shamt);
611 
612   // "Zbb" Standard Extension, opcode = 0x13, 0x1b, 0x33 or 0x3b, funct3 and funct7 varies.
613   // Note: 32-bit sext.b, sext.h and zext.h from the Zbb extension are explicitly
614   // prefixed with "Zbb" to differentiate them from the utility macros.
615   void Andn(XRegister rd, XRegister rs1, XRegister rs2);
616   void Orn(XRegister rd, XRegister rs1, XRegister rs2);
617   void Xnor(XRegister rd, XRegister rs1, XRegister rs2);
618   void Clz(XRegister rd, XRegister rs1);
619   void Clzw(XRegister rd, XRegister rs1);
620   void Ctz(XRegister rd, XRegister rs1);
621   void Ctzw(XRegister rd, XRegister rs1);
622   void Cpop(XRegister rd, XRegister rs1);
623   void Cpopw(XRegister rd, XRegister rs1);
624   void Min(XRegister rd, XRegister rs1, XRegister rs2);
625   void Minu(XRegister rd, XRegister rs1, XRegister rs2);
626   void Max(XRegister rd, XRegister rs1, XRegister rs2);
627   void Maxu(XRegister rd, XRegister rs1, XRegister rs2);
628   void Rol(XRegister rd, XRegister rs1, XRegister rs2);
629   void Rolw(XRegister rd, XRegister rs1, XRegister rs2);
630   void Ror(XRegister rd, XRegister rs1, XRegister rs2);
631   void Rorw(XRegister rd, XRegister rs1, XRegister rs2);
632   void Rori(XRegister rd, XRegister rs1, int32_t shamt);
633   void Roriw(XRegister rd, XRegister rs1, int32_t shamt);
634   void OrcB(XRegister rd, XRegister rs1);
635   void Rev8(XRegister rd, XRegister rs1);
636   void ZbbSextB(XRegister rd, XRegister rs1);
637   void ZbbSextH(XRegister rd, XRegister rs1);
638   void ZbbZextH(XRegister rd, XRegister rs1);
639 
640   ////////////////////////////// RISC-V Vector Instructions  START ///////////////////////////////
641   enum class LengthMultiplier : uint32_t {
642     kM1Over8 = 0b101,
643     kM1Over4 = 0b110,
644     kM1Over2 = 0b111,
645     kM1 = 0b000,
646     kM2 = 0b001,
647     kM4 = 0b010,
648     kM8 = 0b011,
649 
650     kReserved1 = 0b100,
651   };
652 
653   enum class SelectedElementWidth : uint32_t {
654     kE8 = 0b000,
655     kE16 = 0b001,
656     kE32 = 0b010,
657     kE64 = 0b011,
658 
659     kReserved1 = 0b100,
660     kReserved2 = 0b101,
661     kReserved3 = 0b110,
662     kReserved4 = 0b111,
663   };
664 
665   enum class VectorMaskAgnostic : uint32_t {
666     kUndisturbed = 0,
667     kAgnostic = 1,
668   };
669 
670   enum class VectorTailAgnostic : uint32_t {
671     kUndisturbed = 0,
672     kAgnostic = 1,
673   };
674 
675   enum class VM : uint32_t {  // Vector mask
676     kV0_t = 0b0,
677     kUnmasked = 0b1
678   };
679 
680   // Vector Conguration-Setting Instructions, opcode = 0x57, funct3 = 0x3
681   void VSetvli(XRegister rd, XRegister rs1, uint32_t vtypei);
682   void VSetivli(XRegister rd, uint32_t uimm, uint32_t vtypei);
683   void VSetvl(XRegister rd, XRegister rs1, XRegister rs2);
684 
VTypeiValue(VectorMaskAgnostic vma,VectorTailAgnostic vta,SelectedElementWidth sew,LengthMultiplier lmul)685   static uint32_t VTypeiValue(VectorMaskAgnostic vma,
686                               VectorTailAgnostic vta,
687                               SelectedElementWidth sew,
688                               LengthMultiplier lmul) {
689     return static_cast<uint32_t>(vma) << 7 | static_cast<uint32_t>(vta) << 6 |
690            static_cast<uint32_t>(sew) << 3 | static_cast<uint32_t>(lmul);
691   }
692 
693   // Vector Unit-Stride Load/Store Instructions
694   void VLe8(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
695   void VLe16(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
696   void VLe32(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
697   void VLe64(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
698   void VLm(VRegister vd, XRegister rs1);
699 
700   void VSe8(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
701   void VSe16(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
702   void VSe32(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
703   void VSe64(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
704   void VSm(VRegister vs3, XRegister rs1);
705 
706   // Vector unit-stride fault-only-first Instructions
707   void VLe8ff(VRegister vd, XRegister rs1);
708   void VLe16ff(VRegister vd, XRegister rs1);
709   void VLe32ff(VRegister vd, XRegister rs1);
710   void VLe64ff(VRegister vd, XRegister rs1);
711 
712   // Vector Strided Load/Store Instructions
713   void VLse8(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
714   void VLse16(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
715   void VLse32(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
716   void VLse64(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
717 
718   void VSse8(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
719   void VSse16(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
720   void VSse32(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
721   void VSse64(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
722 
723   // Vector Indexed Load/Store Instructions
724   void VLoxei8(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
725   void VLoxei16(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
726   void VLoxei32(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
727   void VLoxei64(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
728 
729   void VLuxei8(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
730   void VLuxei16(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
731   void VLuxei32(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
732   void VLuxei64(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
733 
734   void VSoxei8(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
735   void VSoxei16(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
736   void VSoxei32(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
737   void VSoxei64(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
738 
739   void VSuxei8(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
740   void VSuxei16(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
741   void VSuxei32(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
742   void VSuxei64(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
743 
744   // Vector Segment Load/Store
745 
746   // Vector Unit-Stride Segment Loads/Stores
747 
748   void VLseg2e8(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
749   void VLseg2e16(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
750   void VLseg2e32(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
751   void VLseg2e64(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
752   void VLseg3e8(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
753   void VLseg3e16(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
754   void VLseg3e32(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
755   void VLseg3e64(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
756   void VLseg4e8(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
757   void VLseg4e16(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
758   void VLseg4e32(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
759   void VLseg4e64(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
760   void VLseg5e8(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
761   void VLseg5e16(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
762   void VLseg5e32(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
763   void VLseg5e64(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
764   void VLseg6e8(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
765   void VLseg6e16(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
766   void VLseg6e32(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
767   void VLseg6e64(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
768   void VLseg7e8(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
769   void VLseg7e16(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
770   void VLseg7e32(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
771   void VLseg7e64(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
772   void VLseg8e8(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
773   void VLseg8e16(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
774   void VLseg8e32(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
775   void VLseg8e64(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
776 
777   void VSseg2e8(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
778   void VSseg2e16(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
779   void VSseg2e32(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
780   void VSseg2e64(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
781   void VSseg3e8(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
782   void VSseg3e16(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
783   void VSseg3e32(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
784   void VSseg3e64(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
785   void VSseg4e8(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
786   void VSseg4e16(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
787   void VSseg4e32(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
788   void VSseg4e64(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
789   void VSseg5e8(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
790   void VSseg5e16(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
791   void VSseg5e32(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
792   void VSseg5e64(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
793   void VSseg6e8(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
794   void VSseg6e16(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
795   void VSseg6e32(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
796   void VSseg6e64(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
797   void VSseg7e8(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
798   void VSseg7e16(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
799   void VSseg7e32(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
800   void VSseg7e64(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
801   void VSseg8e8(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
802   void VSseg8e16(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
803   void VSseg8e32(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
804   void VSseg8e64(VRegister vs3, XRegister rs1, VM vm = VM::kUnmasked);
805 
806   // Vector Unit-Stride Fault-only-First Segment Loads
807 
808   void VLseg2e8ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
809   void VLseg2e16ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
810   void VLseg2e32ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
811   void VLseg2e64ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
812   void VLseg3e8ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
813   void VLseg3e16ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
814   void VLseg3e32ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
815   void VLseg3e64ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
816   void VLseg4e8ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
817   void VLseg4e16ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
818   void VLseg4e32ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
819   void VLseg4e64ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
820   void VLseg5e8ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
821   void VLseg5e16ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
822   void VLseg5e32ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
823   void VLseg5e64ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
824   void VLseg6e8ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
825   void VLseg6e16ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
826   void VLseg6e32ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
827   void VLseg6e64ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
828   void VLseg7e8ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
829   void VLseg7e16ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
830   void VLseg7e32ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
831   void VLseg7e64ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
832   void VLseg8e8ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
833   void VLseg8e16ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
834   void VLseg8e32ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
835   void VLseg8e64ff(VRegister vd, XRegister rs1, VM vm = VM::kUnmasked);
836 
837   // Vector Strided Segment Loads/Stores
838 
839   void VLsseg2e8(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
840   void VLsseg2e16(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
841   void VLsseg2e32(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
842   void VLsseg2e64(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
843   void VLsseg3e8(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
844   void VLsseg3e16(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
845   void VLsseg3e32(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
846   void VLsseg3e64(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
847   void VLsseg4e8(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
848   void VLsseg4e16(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
849   void VLsseg4e32(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
850   void VLsseg4e64(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
851   void VLsseg5e8(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
852   void VLsseg5e16(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
853   void VLsseg5e32(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
854   void VLsseg5e64(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
855   void VLsseg6e8(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
856   void VLsseg6e16(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
857   void VLsseg6e32(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
858   void VLsseg6e64(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
859   void VLsseg7e8(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
860   void VLsseg7e16(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
861   void VLsseg7e32(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
862   void VLsseg7e64(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
863   void VLsseg8e8(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
864   void VLsseg8e16(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
865   void VLsseg8e32(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
866   void VLsseg8e64(VRegister vd, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
867 
868   void VSsseg2e8(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
869   void VSsseg2e16(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
870   void VSsseg2e32(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
871   void VSsseg2e64(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
872   void VSsseg3e8(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
873   void VSsseg3e16(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
874   void VSsseg3e32(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
875   void VSsseg3e64(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
876   void VSsseg4e8(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
877   void VSsseg4e16(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
878   void VSsseg4e32(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
879   void VSsseg4e64(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
880   void VSsseg5e8(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
881   void VSsseg5e16(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
882   void VSsseg5e32(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
883   void VSsseg5e64(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
884   void VSsseg6e8(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
885   void VSsseg6e16(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
886   void VSsseg6e32(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
887   void VSsseg6e64(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
888   void VSsseg7e8(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
889   void VSsseg7e16(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
890   void VSsseg7e32(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
891   void VSsseg7e64(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
892   void VSsseg8e8(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
893   void VSsseg8e16(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
894   void VSsseg8e32(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
895   void VSsseg8e64(VRegister vs3, XRegister rs1, XRegister rs2, VM vm = VM::kUnmasked);
896 
897   // Vector Indexed-unordered Segment Loads/Stores
898 
899   void VLuxseg2ei8(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
900   void VLuxseg2ei16(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
901   void VLuxseg2ei32(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
902   void VLuxseg2ei64(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
903   void VLuxseg3ei8(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
904   void VLuxseg3ei16(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
905   void VLuxseg3ei32(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
906   void VLuxseg3ei64(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
907   void VLuxseg4ei8(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
908   void VLuxseg4ei16(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
909   void VLuxseg4ei32(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
910   void VLuxseg4ei64(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
911   void VLuxseg5ei8(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
912   void VLuxseg5ei16(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
913   void VLuxseg5ei32(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
914   void VLuxseg5ei64(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
915   void VLuxseg6ei8(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
916   void VLuxseg6ei16(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
917   void VLuxseg6ei32(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
918   void VLuxseg6ei64(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
919   void VLuxseg7ei8(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
920   void VLuxseg7ei16(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
921   void VLuxseg7ei32(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
922   void VLuxseg7ei64(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
923   void VLuxseg8ei8(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
924   void VLuxseg8ei16(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
925   void VLuxseg8ei32(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
926   void VLuxseg8ei64(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
927 
928   void VSuxseg2ei8(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
929   void VSuxseg2ei16(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
930   void VSuxseg2ei32(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
931   void VSuxseg2ei64(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
932   void VSuxseg3ei8(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
933   void VSuxseg3ei16(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
934   void VSuxseg3ei32(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
935   void VSuxseg3ei64(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
936   void VSuxseg4ei8(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
937   void VSuxseg4ei16(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
938   void VSuxseg4ei32(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
939   void VSuxseg4ei64(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
940   void VSuxseg5ei8(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
941   void VSuxseg5ei16(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
942   void VSuxseg5ei32(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
943   void VSuxseg5ei64(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
944   void VSuxseg6ei8(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
945   void VSuxseg6ei16(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
946   void VSuxseg6ei32(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
947   void VSuxseg6ei64(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
948   void VSuxseg7ei8(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
949   void VSuxseg7ei16(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
950   void VSuxseg7ei32(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
951   void VSuxseg7ei64(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
952   void VSuxseg8ei8(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
953   void VSuxseg8ei16(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
954   void VSuxseg8ei32(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
955   void VSuxseg8ei64(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
956 
957   // Vector Indexed-ordered Segment Loads/Stores
958 
959   void VLoxseg2ei8(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
960   void VLoxseg2ei16(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
961   void VLoxseg2ei32(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
962   void VLoxseg2ei64(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
963   void VLoxseg3ei8(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
964   void VLoxseg3ei16(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
965   void VLoxseg3ei32(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
966   void VLoxseg3ei64(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
967   void VLoxseg4ei8(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
968   void VLoxseg4ei16(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
969   void VLoxseg4ei32(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
970   void VLoxseg4ei64(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
971   void VLoxseg5ei8(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
972   void VLoxseg5ei16(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
973   void VLoxseg5ei32(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
974   void VLoxseg5ei64(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
975   void VLoxseg6ei8(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
976   void VLoxseg6ei16(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
977   void VLoxseg6ei32(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
978   void VLoxseg6ei64(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
979   void VLoxseg7ei8(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
980   void VLoxseg7ei16(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
981   void VLoxseg7ei32(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
982   void VLoxseg7ei64(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
983   void VLoxseg8ei8(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
984   void VLoxseg8ei16(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
985   void VLoxseg8ei32(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
986   void VLoxseg8ei64(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
987 
988   void VSoxseg2ei8(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
989   void VSoxseg2ei16(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
990   void VSoxseg2ei32(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
991   void VSoxseg2ei64(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
992   void VSoxseg3ei8(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
993   void VSoxseg3ei16(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
994   void VSoxseg3ei32(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
995   void VSoxseg3ei64(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
996   void VSoxseg4ei8(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
997   void VSoxseg4ei16(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
998   void VSoxseg4ei32(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
999   void VSoxseg4ei64(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1000   void VSoxseg5ei8(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1001   void VSoxseg5ei16(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1002   void VSoxseg5ei32(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1003   void VSoxseg5ei64(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1004   void VSoxseg6ei8(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1005   void VSoxseg6ei16(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1006   void VSoxseg6ei32(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1007   void VSoxseg6ei64(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1008   void VSoxseg7ei8(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1009   void VSoxseg7ei16(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1010   void VSoxseg7ei32(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1011   void VSoxseg7ei64(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1012   void VSoxseg8ei8(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1013   void VSoxseg8ei16(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1014   void VSoxseg8ei32(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1015   void VSoxseg8ei64(VRegister vs3, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1016 
1017   // Vector Whole Register Load/Store Instructions
1018 
1019   void VL1re8(VRegister vd, XRegister rs1);
1020   void VL1re16(VRegister vd, XRegister rs1);
1021   void VL1re32(VRegister vd, XRegister rs1);
1022   void VL1re64(VRegister vd, XRegister rs1);
1023 
1024   void VL2re8(VRegister vd, XRegister rs1);
1025   void VL2re16(VRegister vd, XRegister rs1);
1026   void VL2re32(VRegister vd, XRegister rs1);
1027   void VL2re64(VRegister vd, XRegister rs1);
1028 
1029   void VL4re8(VRegister vd, XRegister rs1);
1030   void VL4re16(VRegister vd, XRegister rs1);
1031   void VL4re32(VRegister vd, XRegister rs1);
1032   void VL4re64(VRegister vd, XRegister rs1);
1033 
1034   void VL8re8(VRegister vd, XRegister rs1);
1035   void VL8re16(VRegister vd, XRegister rs1);
1036   void VL8re32(VRegister vd, XRegister rs1);
1037   void VL8re64(VRegister vd, XRegister rs1);
1038 
1039   void VL1r(VRegister vd, XRegister rs1);  // Pseudoinstruction equal to VL1re8
1040   void VL2r(VRegister vd, XRegister rs1);  // Pseudoinstruction equal to VL2re8
1041   void VL4r(VRegister vd, XRegister rs1);  // Pseudoinstruction equal to VL4re8
1042   void VL8r(VRegister vd, XRegister rs1);  // Pseudoinstruction equal to VL8re8
1043 
1044   void VS1r(VRegister vs3, XRegister rs1);  // Store {vs3} to address in a1
1045   void VS2r(VRegister vs3, XRegister rs1);  // Store {vs3}-{vs3 + 1} to address in a1
1046   void VS4r(VRegister vs3, XRegister rs1);  // Store {vs3}-{vs3 + 3} to address in a1
1047   void VS8r(VRegister vs3, XRegister rs1);  // Store {vs3}-{vs3 + 7} to address in a1
1048 
1049   // Vector Arithmetic Instruction
1050 
1051   // Vector vadd instructions, funct6 = 0b000000
1052   void VAdd_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1053   void VAdd_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1054   void VAdd_vi(VRegister vd, VRegister vs2, int32_t imm5, VM vm = VM::kUnmasked);
1055 
1056   // Vector vsub instructions, funct6 = 0b000010
1057   void VSub_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1058   void VSub_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1059 
1060   // Vector vrsub instructions, funct6 = 0b000011
1061   void VRsub_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1062   void VRsub_vi(VRegister vd, VRegister vs2, int32_t imm5, VM vm = VM::kUnmasked);
1063 
1064   // Pseudo-instruction over VRsub_vi
1065   void VNeg_v(VRegister vd, VRegister vs2);
1066 
1067   // Vector vminu instructions, funct6 = 0b000100
1068   void VMinu_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1069   void VMinu_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1070 
1071   // Vector vmin instructions, funct6 = 0b000101
1072   void VMin_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1073   void VMin_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1074 
1075   // Vector vmaxu instructions, funct6 = 0b000110
1076   void VMaxu_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1077   void VMaxu_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1078 
1079   // Vector vmax instructions, funct6 = 0b000111
1080   void VMax_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1081   void VMax_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1082 
1083   // Vector vand instructions, funct6 = 0b001001
1084   void VAnd_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1085   void VAnd_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1086   void VAnd_vi(VRegister vd, VRegister vs2, int32_t imm5, VM vm = VM::kUnmasked);
1087 
1088   // Vector vor instructions, funct6 = 0b001010
1089   void VOr_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1090   void VOr_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1091   void VOr_vi(VRegister vd, VRegister vs2, int32_t imm5, VM vm = VM::kUnmasked);
1092 
1093   // Vector vxor instructions, funct6 = 0b001011
1094   void VXor_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1095   void VXor_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1096   void VXor_vi(VRegister vd, VRegister vs2, int32_t imm5, VM vm = VM::kUnmasked);
1097 
1098   // Pseudo-instruction over VXor_vi
1099   void VNot_v(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1100 
1101   // Vector vrgather instructions, funct6 = 0b001100
1102   void VRgather_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1103   void VRgather_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1104   void VRgather_vi(VRegister vd, VRegister vs2, uint32_t uimm5, VM vm = VM::kUnmasked);
1105 
1106   // Vector vslideup instructions, funct6 = 0b001110
1107   void VSlideup_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1108   void VSlideup_vi(VRegister vd, VRegister vs2, uint32_t uimm5, VM vm = VM::kUnmasked);
1109 
1110   // Vector vrgatherei16 instructions, funct6 = 0b001110
1111   void VRgatherei16_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1112 
1113   // Vector vslidedown instructions, funct6 = 0b001111
1114   void VSlidedown_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1115   void VSlidedown_vi(VRegister vd, VRegister vs2, uint32_t uimm5, VM vm = VM::kUnmasked);
1116 
1117   // Vector vadc instructions, funct6 = 0b010000
1118   void VAdc_vvm(VRegister vd, VRegister vs2, VRegister vs1);
1119   void VAdc_vxm(VRegister vd, VRegister vs2, XRegister rs1);
1120   void VAdc_vim(VRegister vd, VRegister vs2, int32_t imm5);
1121 
1122   // Vector vmadc instructions, funct6 = 0b010001
1123   void VMadc_vvm(VRegister vd, VRegister vs2, VRegister vs1);
1124   void VMadc_vxm(VRegister vd, VRegister vs2, XRegister rs1);
1125   void VMadc_vim(VRegister vd, VRegister vs2, int32_t imm5);
1126 
1127   // Vector vmadc instructions, funct6 = 0b010001
1128   void VMadc_vv(VRegister vd, VRegister vs2, VRegister vs1);
1129   void VMadc_vx(VRegister vd, VRegister vs2, XRegister rs1);
1130   void VMadc_vi(VRegister vd, VRegister vs2, int32_t imm5);
1131 
1132   // Vector vsbc instructions, funct6 = 0b010010
1133   void VSbc_vvm(VRegister vd, VRegister vs2, VRegister vs1);
1134   void VSbc_vxm(VRegister vd, VRegister vs2, XRegister rs1);
1135 
1136   // Vector vmsbc instructions, funct6 = 0b010011
1137   void VMsbc_vvm(VRegister vd, VRegister vs2, VRegister vs1);
1138   void VMsbc_vxm(VRegister vd, VRegister vs2, XRegister rs1);
1139   void VMsbc_vv(VRegister vd, VRegister vs2, VRegister vs1);
1140   void VMsbc_vx(VRegister vd, VRegister vs2, XRegister rs1);
1141 
1142   // Vector vmerge instructions, funct6 = 0b010111, vm = 0
1143   void VMerge_vvm(VRegister vd, VRegister vs2, VRegister vs1);
1144   void VMerge_vxm(VRegister vd, VRegister vs2, XRegister rs1);
1145   void VMerge_vim(VRegister vd, VRegister vs2, int32_t imm5);
1146 
1147   // Vector vmv instructions, funct6 = 0b010111, vm = 1, vs2 = v0
1148   void VMv_vv(VRegister vd, VRegister vs1);
1149   void VMv_vx(VRegister vd, XRegister rs1);
1150   void VMv_vi(VRegister vd, int32_t imm5);
1151 
1152   // Vector vmseq instructions, funct6 = 0b011000
1153   void VMseq_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1154   void VMseq_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1155   void VMseq_vi(VRegister vd, VRegister vs2, int32_t imm5, VM vm = VM::kUnmasked);
1156 
1157   // Vector vmsne instructions, funct6 = 0b011001
1158   void VMsne_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1159   void VMsne_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1160   void VMsne_vi(VRegister vd, VRegister vs2, int32_t imm5, VM vm = VM::kUnmasked);
1161 
1162   // Vector vmsltu instructions, funct6 = 0b011010
1163   void VMsltu_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1164   void VMsltu_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1165 
1166   // Pseudo-instruction over VMsltu_vv
1167   void VMsgtu_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1168 
1169   // Vector vmslt instructions, funct6 = 0b011011
1170   void VMslt_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1171   void VMslt_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1172 
1173   // Pseudo-instruction over VMslt_vv
1174   void VMsgt_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1175 
1176   // Vector vmsleu instructions, funct6 = 0b011100
1177   void VMsleu_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1178   void VMsleu_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1179   void VMsleu_vi(VRegister vd, VRegister vs2, int32_t imm5, VM vm = VM::kUnmasked);
1180 
1181   // Pseudo-instructions over VMsleu_*
1182   void VMsgeu_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1183   void VMsltu_vi(VRegister vd, VRegister vs2, int32_t aimm5, VM vm = VM::kUnmasked);
1184 
1185   // Vector vmsle instructions, funct6 = 0b011101
1186   void VMsle_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1187   void VMsle_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1188   void VMsle_vi(VRegister vd, VRegister vs2, int32_t imm5, VM vm = VM::kUnmasked);
1189 
1190   // Pseudo-instructions over VMsle_*
1191   void VMsge_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1192   void VMslt_vi(VRegister vd, VRegister vs2, int32_t aimm5, VM vm = VM::kUnmasked);
1193 
1194   // Vector vmsgtu instructions, funct6 = 0b011110
1195   void VMsgtu_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1196   void VMsgtu_vi(VRegister vd, VRegister vs2, int32_t imm5, VM vm = VM::kUnmasked);
1197 
1198   // Pseudo-instruction over VMsgtu_vi
1199   void VMsgeu_vi(VRegister vd, VRegister vs2, int32_t aimm5, VM vm = VM::kUnmasked);
1200 
1201   // Vector vmsgt instructions, funct6 = 0b011111
1202   void VMsgt_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1203   void VMsgt_vi(VRegister vd, VRegister vs2, int32_t imm5, VM vm = VM::kUnmasked);
1204 
1205   // Pseudo-instruction over VMsgt_vi
1206   void VMsge_vi(VRegister vd, VRegister vs2, int32_t aimm5, VM vm = VM::kUnmasked);
1207 
1208   // Vector vsaddu instructions, funct6 = 0b100000
1209   void VSaddu_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1210   void VSaddu_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1211   void VSaddu_vi(VRegister vd, VRegister vs2, int32_t imm5, VM vm = VM::kUnmasked);
1212 
1213   // Vector vsadd instructions, funct6 = 0b100001
1214   void VSadd_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1215   void VSadd_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1216   void VSadd_vi(VRegister vd, VRegister vs2, int32_t imm5, VM vm = VM::kUnmasked);
1217 
1218   // Vector vssubu instructions, funct6 = 0b100010
1219   void VSsubu_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1220   void VSsubu_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1221 
1222   // Vector vssub instructions, funct6 = 0b100011
1223   void VSsub_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1224   void VSsub_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1225 
1226   // Vector vsll instructions, funct6 = 0b100101
1227   void VSll_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1228   void VSll_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1229   void VSll_vi(VRegister vd, VRegister vs2, uint32_t uimm5, VM vm = VM::kUnmasked);
1230 
1231   // Vector vsmul instructions, funct6 = 0b100111
1232   void VSmul_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1233   void VSmul_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1234 
1235   // Vector vmv<nr>r.v instructions, funct6 = 0b100111
1236   void Vmv1r_v(VRegister vd, VRegister vs2);
1237   void Vmv2r_v(VRegister vd, VRegister vs2);
1238   void Vmv4r_v(VRegister vd, VRegister vs2);
1239   void Vmv8r_v(VRegister vd, VRegister vs2);
1240 
1241   // Vector vsrl instructions, funct6 = 0b101000
1242   void VSrl_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1243   void VSrl_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1244   void VSrl_vi(VRegister vd, VRegister vs2, uint32_t uimm5, VM vm = VM::kUnmasked);
1245 
1246   // Vector vsra instructions, funct6 = 0b101001
1247   void VSra_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1248   void VSra_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1249   void VSra_vi(VRegister vd, VRegister vs2, uint32_t uimm5, VM vm = VM::kUnmasked);
1250 
1251   // Vector vssrl instructions, funct6 = 0b101010
1252   void VSsrl_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1253   void VSsrl_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1254   void VSsrl_vi(VRegister vd, VRegister vs2, uint32_t uimm5, VM vm = VM::kUnmasked);
1255 
1256   // Vector vssra instructions, funct6 = 0b101011
1257   void VSsra_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1258   void VSsra_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1259   void VSsra_vi(VRegister vd, VRegister vs2, uint32_t uimm5, VM vm = VM::kUnmasked);
1260 
1261   // Vector vnsrl instructions, funct6 = 0b101100
1262   void VNsrl_wv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1263   void VNsrl_wx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1264   void VNsrl_wi(VRegister vd, VRegister vs2, uint32_t uimm5, VM vm = VM::kUnmasked);
1265 
1266   // Pseudo-instruction over VNsrl_wx
1267   void VNcvt_x_x_w(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1268 
1269   // Vector vnsra instructions, funct6 = 0b101101
1270   void VNsra_wv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1271   void VNsra_wx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1272   void VNsra_wi(VRegister vd, VRegister vs2, uint32_t uimm5, VM vm = VM::kUnmasked);
1273 
1274   // Vector vnclipu instructions, funct6 = 0b101110
1275   void VNclipu_wv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1276   void VNclipu_wx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1277   void VNclipu_wi(VRegister vd, VRegister vs2, uint32_t uimm5, VM vm = VM::kUnmasked);
1278 
1279   // Vector vnclip instructions, funct6 = 0b101111
1280   void VNclip_wv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1281   void VNclip_wx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1282   void VNclip_wi(VRegister vd, VRegister vs2, uint32_t uimm5, VM vm = VM::kUnmasked);
1283 
1284   // Vector vwredsumu instructions, funct6 = 0b110000
1285   void VWredsumu_vs(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1286 
1287   // Vector vwredsum instructions, funct6 = 0b110001
1288   void VWredsum_vs(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1289 
1290   // Vector vredsum instructions, funct6 = 0b000000
1291   void VRedsum_vs(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1292 
1293   // Vector vredand instructions, funct6 = 0b000001
1294   void VRedand_vs(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1295 
1296   // Vector vredor instructions, funct6 = 0b000010
1297   void VRedor_vs(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1298 
1299   // Vector vredxor instructions, funct6 = 0b000011
1300   void VRedxor_vs(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1301 
1302   // Vector vredminu instructions, funct6 = 0b000100
1303   void VRedminu_vs(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1304 
1305   // Vector vredmin instructions, funct6 = 0b000101
1306   void VRedmin_vs(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1307 
1308   // Vector vredmaxu instructions, funct6 = 0b000110
1309   void VRedmaxu_vs(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1310 
1311   // Vector vredmax instructions, funct6 = 0b000111
1312   void VRedmax_vs(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1313 
1314   // Vector vaaddu instructions, funct6 = 0b001000
1315   void VAaddu_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1316   void VAaddu_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1317 
1318   // Vector vaadd instructions, funct6 = 0b001001
1319   void VAadd_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1320   void VAadd_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1321 
1322   // Vector vasubu instructions, funct6 = 0b001010
1323   void VAsubu_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1324   void VAsubu_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1325 
1326   // Vector vasub instructions, funct6 = 0b001011
1327   void VAsub_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1328   void VAsub_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1329 
1330   // Vector vslide1up instructions, funct6 = 0b001110
1331   void VSlide1up_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1332 
1333   // Vector vslide1down instructions, funct6 = 0b001111
1334   void VSlide1down_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1335 
1336   // Vector vcompress instructions, funct6 = 0b010111
1337   void VCompress_vm(VRegister vd, VRegister vs2, VRegister vs1);
1338 
1339   // Vector vmandn instructions, funct6 = 0b011000
1340   void VMandn_mm(VRegister vd, VRegister vs2, VRegister vs1);
1341 
1342   // Vector vmand instructions, funct6 = 0b011001
1343   void VMand_mm(VRegister vd, VRegister vs2, VRegister vs1);
1344 
1345   // Pseudo-instruction over VMand_mm
1346   void VMmv_m(VRegister vd, VRegister vs2);
1347 
1348   // Vector vmor instructions, funct6 = 0b011010
1349   void VMor_mm(VRegister vd, VRegister vs2, VRegister vs1);
1350 
1351   // Vector vmxor instructions, funct6 = 0b011011
1352   void VMxor_mm(VRegister vd, VRegister vs2, VRegister vs1);
1353 
1354   // Pseudo-instruction over VMxor_mm
1355   void VMclr_m(VRegister vd);
1356 
1357   // Vector vmorn instructions, funct6 = 0b011100
1358   void VMorn_mm(VRegister vd, VRegister vs2, VRegister vs1);
1359 
1360   // Vector vmnand instructions, funct6 = 0b011101
1361   void VMnand_mm(VRegister vd, VRegister vs2, VRegister vs1);
1362 
1363   // Pseudo-instruction over VMnand_mm
1364   void VMnot_m(VRegister vd, VRegister vs2);
1365 
1366   // Vector vmnor instructions, funct6 = 0b011110
1367   void VMnor_mm(VRegister vd, VRegister vs2, VRegister vs1);
1368 
1369   // Vector vmxnor instructions, funct6 = 0b011111
1370   void VMxnor_mm(VRegister vd, VRegister vs2, VRegister vs1);
1371 
1372   // Pseudo-instruction over VMxnor_mm
1373   void VMset_m(VRegister vd);
1374 
1375   // Vector vdivu instructions, funct6 = 0b100000
1376   void VDivu_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1377   void VDivu_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1378 
1379   // Vector vdiv instructions, funct6 = 0b100001
1380   void VDiv_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1381   void VDiv_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1382 
1383   // Vector vremu instructions, funct6 = 0b100010
1384   void VRemu_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1385   void VRemu_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1386 
1387   // Vector vrem instructions, funct6 = 0b100011
1388   void VRem_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1389   void VRem_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1390 
1391   // Vector vmulhu instructions, funct6 = 0b100100
1392   void VMulhu_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1393   void VMulhu_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1394 
1395   // Vector vmul instructions, funct6 = 0b100101
1396   void VMul_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1397   void VMul_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1398 
1399   // Vector vmulhsu instructions, funct6 = 0b100110
1400   void VMulhsu_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1401   void VMulhsu_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1402 
1403   // Vector vmulh instructions, funct6 = 0b100111
1404   void VMulh_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1405   void VMulh_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1406 
1407   // Vector vmadd instructions, funct6 = 0b101001
1408   void VMadd_vv(VRegister vd, VRegister vs1, VRegister vs2, VM vm = VM::kUnmasked);
1409   void VMadd_vx(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1410 
1411   // Vector vnmsub instructions, funct6 = 0b101011
1412   void VNmsub_vv(VRegister vd, VRegister vs1, VRegister vs2, VM vm = VM::kUnmasked);
1413   void VNmsub_vx(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1414 
1415   // Vector vmacc instructions, funct6 = 0b101101
1416   void VMacc_vv(VRegister vd, VRegister vs1, VRegister vs2, VM vm = VM::kUnmasked);
1417   void VMacc_vx(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1418 
1419   // Vector vnmsac instructions, funct6 = 0b101111
1420   void VNmsac_vv(VRegister vd, VRegister vs1, VRegister vs2, VM vm = VM::kUnmasked);
1421   void VNmsac_vx(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1422 
1423   // Vector vwaddu instructions, funct6 = 0b110000
1424   void VWaddu_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1425   void VWaddu_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1426 
1427   // Pseudo-instruction over VWaddu_vx
1428   void VWcvtu_x_x_v(VRegister vd, VRegister vs, VM vm = VM::kUnmasked);
1429 
1430   // Vector vwadd instructions, funct6 = 0b110001
1431   void VWadd_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1432   void VWadd_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1433 
1434   // Pseudo-instruction over VWadd_vx
1435   void VWcvt_x_x_v(VRegister vd, VRegister vs, VM vm = VM::kUnmasked);
1436 
1437   // Vector vwsubu instructions, funct6 = 0b110010
1438   void VWsubu_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1439   void VWsubu_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1440 
1441   // Vector vwsub instructions, funct6 = 0b110011
1442   void VWsub_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1443   void VWsub_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1444 
1445   // Vector vwaddu.w instructions, funct6 = 0b110100
1446   void VWaddu_wv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1447   void VWaddu_wx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1448 
1449   // Vector vwadd.w instructions, funct6 = 0b110101
1450   void VWadd_wv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1451   void VWadd_wx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1452 
1453   // Vector vwsubu.w instructions, funct6 = 0b110110
1454   void VWsubu_wv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1455   void VWsubu_wx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1456 
1457   // Vector vwsub.w instructions, funct6 = 0b110111
1458   void VWsub_wv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1459   void VWsub_wx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1460 
1461   // Vector vwmulu instructions, funct6 = 0b111000
1462   void VWmulu_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1463   void VWmulu_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1464 
1465   // Vector vwmulsu instructions, funct6 = 0b111010
1466   void VWmulsu_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1467   void VWmulsu_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1468 
1469   // Vector vwmul instructions, funct6 = 0b111011
1470   void VWmul_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1471   void VWmul_vx(VRegister vd, VRegister vs2, XRegister rs1, VM vm = VM::kUnmasked);
1472 
1473   // Vector vwmaccu instructions, funct6 = 0b111100
1474   void VWmaccu_vv(VRegister vd, VRegister vs1, VRegister vs2, VM vm = VM::kUnmasked);
1475   void VWmaccu_vx(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1476 
1477   // Vector vwmacc instructions, funct6 = 0b111101
1478   void VWmacc_vv(VRegister vd, VRegister vs1, VRegister vs2, VM vm = VM::kUnmasked);
1479   void VWmacc_vx(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1480 
1481   // Vector vwmaccus instructions, funct6 = 0b111110
1482   void VWmaccus_vx(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1483 
1484   // Vector vwmaccsu instructions, funct6 = 0b111111
1485   void VWmaccsu_vv(VRegister vd, VRegister vs1, VRegister vs2, VM vm = VM::kUnmasked);
1486   void VWmaccsu_vx(VRegister vd, XRegister rs1, VRegister vs2, VM vm = VM::kUnmasked);
1487 
1488   // Vector vfadd instructions, funct6 = 0b000000
1489   void VFadd_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1490   void VFadd_vf(VRegister vd, VRegister vs2, FRegister fs1, VM vm = VM::kUnmasked);
1491 
1492   // Vector vfredusum instructions, funct6 = 0b000001
1493   void VFredusum_vs(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1494 
1495   // Vector vfsub instructions, funct6 = 0b000010
1496   void VFsub_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1497   void VFsub_vf(VRegister vd, VRegister vs2, FRegister fs1, VM vm = VM::kUnmasked);
1498 
1499   // Vector vfredosum instructions, funct6 = 0b000011
1500   void VFredosum_vs(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1501 
1502   // Vector vfmin instructions, funct6 = 0b000100
1503   void VFmin_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1504   void VFmin_vf(VRegister vd, VRegister vs2, FRegister fs1, VM vm = VM::kUnmasked);
1505 
1506   // Vector vfredmin instructions, funct6 = 0b000101
1507   void VFredmin_vs(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1508 
1509   // Vector vfmax instructions, funct6 = 0b000110
1510   void VFmax_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1511   void VFmax_vf(VRegister vd, VRegister vs2, FRegister fs1, VM vm = VM::kUnmasked);
1512 
1513   // Vector vfredmax instructions, funct6 = 0b000111
1514   void VFredmax_vs(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1515 
1516   // Vector vfsgnj instructions, funct6 = 0b001000
1517   void VFsgnj_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1518   void VFsgnj_vf(VRegister vd, VRegister vs2, FRegister fs1, VM vm = VM::kUnmasked);
1519 
1520   // Vector vfsgnjn instructions, funct6 = 0b001001
1521   void VFsgnjn_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1522   void VFsgnjn_vf(VRegister vd, VRegister vs2, FRegister fs1, VM vm = VM::kUnmasked);
1523 
1524   // Pseudo-instruction over VFsgnjn_vv
1525   void VFneg_v(VRegister vd, VRegister vs);
1526 
1527   // Vector vfsgnjx instructions, funct6 = 0b001010
1528   void VFsgnjx_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1529   void VFsgnjx_vf(VRegister vd, VRegister vs2, FRegister fs1, VM vm = VM::kUnmasked);
1530 
1531   // Pseudo-instruction over VFsgnjx_vv
1532   void VFabs_v(VRegister vd, VRegister vs);
1533 
1534   // Vector vfslide1up instructions, funct6 = 0b001110
1535   void VFslide1up_vf(VRegister vd, VRegister vs2, FRegister fs1, VM vm = VM::kUnmasked);
1536 
1537   // Vector vfslide1down instructions, funct6 = 0b001111
1538   void VFslide1down_vf(VRegister vd, VRegister vs2, FRegister fs1, VM vm = VM::kUnmasked);
1539 
1540   // Vector vfmerge/vfmv instructions, funct6 = 0b010111
1541   void VFmerge_vfm(VRegister vd, VRegister vs2, FRegister fs1);
1542   void VFmv_v_f(VRegister vd, FRegister fs1);
1543 
1544   // Vector vmfeq instructions, funct6 = 0b011000
1545   void VMfeq_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1546   void VMfeq_vf(VRegister vd, VRegister vs2, FRegister fs1, VM vm = VM::kUnmasked);
1547 
1548   // Vector vmfle instructions, funct6 = 0b011001
1549   void VMfle_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1550   void VMfle_vf(VRegister vd, VRegister vs2, FRegister fs1, VM vm = VM::kUnmasked);
1551 
1552   // Pseudo-instruction over VMfle_vv
1553   void VMfge_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1554 
1555   // Vector vmflt instructions, funct6 = 0b011011
1556   void VMflt_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1557   void VMflt_vf(VRegister vd, VRegister vs2, FRegister fs1, VM vm = VM::kUnmasked);
1558 
1559   // Pseudo-instruction over VMflt_vv
1560   void VMfgt_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1561 
1562   // Vector vmfne instructions, funct6 = 0b011100
1563   void VMfne_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1564   void VMfne_vf(VRegister vd, VRegister vs2, FRegister fs1, VM vm = VM::kUnmasked);
1565 
1566   // Vector vmfgt instructions, funct6 = 0b011101
1567   void VMfgt_vf(VRegister vd, VRegister vs2, FRegister fs1, VM vm = VM::kUnmasked);
1568 
1569   // Vector vmfge instructions, funct6 = 0b011111
1570   void VMfge_vf(VRegister vd, VRegister vs2, FRegister fs1, VM vm = VM::kUnmasked);
1571 
1572   // Vector vfdiv instructions, funct6 = 0b100000
1573   void VFdiv_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1574   void VFdiv_vf(VRegister vd, VRegister vs2, FRegister fs1, VM vm = VM::kUnmasked);
1575 
1576   // Vector vfrdiv instructions, funct6 = 0b100001
1577   void VFrdiv_vf(VRegister vd, VRegister vs2, FRegister fs1, VM vm = VM::kUnmasked);
1578 
1579   // Vector vfmul instructions, funct6 = 0b100100
1580   void VFmul_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1581   void VFmul_vf(VRegister vd, VRegister vs2, FRegister fs1, VM vm = VM::kUnmasked);
1582 
1583   // Vector vfrsub instructions, funct6 = 0b100111
1584   void VFrsub_vf(VRegister vd, VRegister vs2, FRegister fs1, VM vm = VM::kUnmasked);
1585 
1586   // Vector vfmadd instructions, funct6 = 0b101000
1587   void VFmadd_vv(VRegister vd, VRegister vs1, VRegister vs2, VM vm = VM::kUnmasked);
1588   void VFmadd_vf(VRegister vd, FRegister fs1, VRegister vs2, VM vm = VM::kUnmasked);
1589 
1590   // Vector vfnmadd instructions, funct6 = 0b101001
1591   void VFnmadd_vv(VRegister vd, VRegister vs1, VRegister vs2, VM vm = VM::kUnmasked);
1592   void VFnmadd_vf(VRegister vd, FRegister fs1, VRegister vs2, VM vm = VM::kUnmasked);
1593 
1594   // Vector vfmsub instructions, funct6 = 0b101010
1595   void VFmsub_vv(VRegister vd, VRegister vs1, VRegister vs2, VM vm = VM::kUnmasked);
1596   void VFmsub_vf(VRegister vd, FRegister fs1, VRegister vs2, VM vm = VM::kUnmasked);
1597 
1598   // Vector vfnmsub instructions, funct6 = 0b101011
1599   void VFnmsub_vv(VRegister vd, VRegister vs1, VRegister vs2, VM vm = VM::kUnmasked);
1600   void VFnmsub_vf(VRegister vd, FRegister fs1, VRegister vs2, VM vm = VM::kUnmasked);
1601 
1602   // Vector vfmacc instructions, funct6 = 0b101100
1603   void VFmacc_vv(VRegister vd, VRegister vs1, VRegister vs2, VM vm = VM::kUnmasked);
1604   void VFmacc_vf(VRegister vd, FRegister fs1, VRegister vs2, VM vm = VM::kUnmasked);
1605 
1606   // Vector vfnmacc instructions, funct6 = 0b101101
1607   void VFnmacc_vv(VRegister vd, VRegister vs1, VRegister vs2, VM vm = VM::kUnmasked);
1608   void VFnmacc_vf(VRegister vd, FRegister fs1, VRegister vs2, VM vm = VM::kUnmasked);
1609 
1610   // Vector vfmsac instructions, funct6 = 0b101110
1611   void VFmsac_vv(VRegister vd, VRegister vs1, VRegister vs2, VM vm = VM::kUnmasked);
1612   void VFmsac_vf(VRegister vd, FRegister fs1, VRegister vs2, VM vm = VM::kUnmasked);
1613 
1614   // Vector vfnmsac instructions, funct6 = 0b101111
1615   void VFnmsac_vv(VRegister vd, VRegister vs1, VRegister vs2, VM vm = VM::kUnmasked);
1616   void VFnmsac_vf(VRegister vd, FRegister fs1, VRegister vs2, VM vm = VM::kUnmasked);
1617 
1618   // Vector vfwadd instructions, funct6 = 0b110000
1619   void VFwadd_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1620   void VFwadd_vf(VRegister vd, VRegister vs2, FRegister fs1, VM vm = VM::kUnmasked);
1621 
1622   // Vector vfwredusum instructions, funct6 = 0b110001
1623   void VFwredusum_vs(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1624 
1625   // Vector vfwsub instructions, funct6 = 0b110010
1626   void VFwsub_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1627   void VFwsub_vf(VRegister vd, VRegister vs2, FRegister fs1, VM vm = VM::kUnmasked);
1628 
1629   // Vector vfwredosum instructions, funct6 = 0b110011
1630   void VFwredosum_vs(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1631 
1632   // Vector vfwadd.w instructions, funct6 = 0b110100
1633   void VFwadd_wv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1634   void VFwadd_wf(VRegister vd, VRegister vs2, FRegister fs1, VM vm = VM::kUnmasked);
1635 
1636   // Vector vfwsub.w instructions, funct6 = 0b110110
1637   void VFwsub_wv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1638   void VFwsub_wf(VRegister vd, VRegister vs2, FRegister fs1, VM vm = VM::kUnmasked);
1639 
1640   // Vector vfwmul instructions, funct6 = 0b111000
1641   void VFwmul_vv(VRegister vd, VRegister vs2, VRegister vs1, VM vm = VM::kUnmasked);
1642   void VFwmul_vf(VRegister vd, VRegister vs2, FRegister fs1, VM vm = VM::kUnmasked);
1643 
1644   // Vector vfwmacc instructions, funct6 = 0b111100
1645   void VFwmacc_vv(VRegister vd, VRegister vs1, VRegister vs2, VM vm = VM::kUnmasked);
1646   void VFwmacc_vf(VRegister vd, FRegister fs1, VRegister vs2, VM vm = VM::kUnmasked);
1647 
1648   // Vector vfwnmacc instructions, funct6 = 0b111101
1649   void VFwnmacc_vv(VRegister vd, VRegister vs1, VRegister vs2, VM vm = VM::kUnmasked);
1650   void VFwnmacc_vf(VRegister vd, FRegister fs1, VRegister vs2, VM vm = VM::kUnmasked);
1651 
1652   // Vector vfwmsac instructions, funct6 = 0b111110
1653   void VFwmsac_vv(VRegister vd, VRegister vs1, VRegister vs2, VM vm = VM::kUnmasked);
1654   void VFwmsac_vf(VRegister vd, FRegister fs1, VRegister vs2, VM vm = VM::kUnmasked);
1655 
1656   // Vector vfwnmsac instructions, funct6 = 0b111111
1657   void VFwnmsac_vv(VRegister vd, VRegister vs1, VRegister vs2, VM vm = VM::kUnmasked);
1658   void VFwnmsac_vf(VRegister vd, FRegister fs1, VRegister vs2, VM vm = VM::kUnmasked);
1659 
1660   // Vector VRXUNARY0 kind instructions, funct6 = 0b010000
1661   void VMv_s_x(VRegister vd, XRegister rs1);
1662 
1663   // Vector VWXUNARY0 kind instructions, funct6 = 0b010000
1664   void VMv_x_s(XRegister rd, VRegister vs2);
1665   void VCpop_m(XRegister rd, VRegister vs2, VM vm = VM::kUnmasked);
1666   void VFirst_m(XRegister rd, VRegister vs2, VM vm = VM::kUnmasked);
1667 
1668   // Vector VXUNARY0 kind instructions, funct6 = 0b010010
1669   void VZext_vf8(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1670   void VSext_vf8(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1671   void VZext_vf4(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1672   void VSext_vf4(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1673   void VZext_vf2(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1674   void VSext_vf2(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1675 
1676   // Vector VRFUNARY0 kind instructions, funct6 = 0b010000
1677   void VFmv_s_f(VRegister vd, FRegister fs1);
1678 
1679   // Vector VWFUNARY0 kind instructions, funct6 = 0b010000
1680   void VFmv_f_s(FRegister fd, VRegister vs2);
1681 
1682   // Vector VFUNARY0 kind instructions, funct6 = 0b010010
1683   void VFcvt_xu_f_v(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1684   void VFcvt_x_f_v(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1685   void VFcvt_f_xu_v(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1686   void VFcvt_f_x_v(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1687   void VFcvt_rtz_xu_f_v(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1688   void VFcvt_rtz_x_f_v(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1689   void VFwcvt_xu_f_v(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1690   void VFwcvt_x_f_v(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1691   void VFwcvt_f_xu_v(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1692   void VFwcvt_f_x_v(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1693   void VFwcvt_f_f_v(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1694   void VFwcvt_rtz_xu_f_v(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1695   void VFwcvt_rtz_x_f_v(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1696   void VFncvt_xu_f_w(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1697   void VFncvt_x_f_w(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1698   void VFncvt_f_xu_w(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1699   void VFncvt_f_x_w(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1700   void VFncvt_f_f_w(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1701   void VFncvt_rod_f_f_w(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1702   void VFncvt_rtz_xu_f_w(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1703   void VFncvt_rtz_x_f_w(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1704 
1705   // Vector VFUNARY1 kind instructions, funct6 = 0b010011
1706   void VFsqrt_v(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1707   void VFrsqrt7_v(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1708   void VFrec7_v(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1709   void VFclass_v(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1710 
1711   // Vector VMUNARY0 kind instructions, funct6 = 0b010100
1712   void VMsbf_m(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1713   void VMsof_m(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1714   void VMsif_m(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1715   void VIota_m(VRegister vd, VRegister vs2, VM vm = VM::kUnmasked);
1716   void VId_v(VRegister vd, VM vm = VM::kUnmasked);
1717 
1718   ////////////////////////////// RISC-V Vector Instructions  END //////////////////////////////
1719 
1720   ////////////////////////////// RV64 MACRO Instructions  START ///////////////////////////////
1721   // These pseudo instructions are from "RISC-V Assembly Programmer's Manual".
1722 
1723   void Nop();
1724   void Li(XRegister rd, int64_t imm);
1725   void Mv(XRegister rd, XRegister rs);
1726   void Not(XRegister rd, XRegister rs);
1727   void Neg(XRegister rd, XRegister rs);
1728   void NegW(XRegister rd, XRegister rs);
1729   void SextB(XRegister rd, XRegister rs);
1730   void SextH(XRegister rd, XRegister rs);
1731   void SextW(XRegister rd, XRegister rs);
1732   void ZextB(XRegister rd, XRegister rs);
1733   void ZextH(XRegister rd, XRegister rs);
1734   void ZextW(XRegister rd, XRegister rs);
1735   void Seqz(XRegister rd, XRegister rs);
1736   void Snez(XRegister rd, XRegister rs);
1737   void Sltz(XRegister rd, XRegister rs);
1738   void Sgtz(XRegister rd, XRegister rs);
1739   void FMvS(FRegister rd, FRegister rs);
1740   void FAbsS(FRegister rd, FRegister rs);
1741   void FNegS(FRegister rd, FRegister rs);
1742   void FMvD(FRegister rd, FRegister rs);
1743   void FAbsD(FRegister rd, FRegister rs);
1744   void FNegD(FRegister rd, FRegister rs);
1745 
1746   // Branch pseudo instructions
1747   void Beqz(XRegister rs, int32_t offset);
1748   void Bnez(XRegister rs, int32_t offset);
1749   void Blez(XRegister rs, int32_t offset);
1750   void Bgez(XRegister rs, int32_t offset);
1751   void Bltz(XRegister rs, int32_t offset);
1752   void Bgtz(XRegister rs, int32_t offset);
1753   void Bgt(XRegister rs, XRegister rt, int32_t offset);
1754   void Ble(XRegister rs, XRegister rt, int32_t offset);
1755   void Bgtu(XRegister rs, XRegister rt, int32_t offset);
1756   void Bleu(XRegister rs, XRegister rt, int32_t offset);
1757 
1758   // Jump pseudo instructions
1759   void J(int32_t offset);
1760   void Jal(int32_t offset);
1761   void Jr(XRegister rs);
1762   void Jalr(XRegister rs);
1763   void Jalr(XRegister rd, XRegister rs);
1764   void Ret();
1765 
1766   // Pseudo instructions for accessing control and status registers
1767   void RdCycle(XRegister rd);
1768   void RdTime(XRegister rd);
1769   void RdInstret(XRegister rd);
1770   void Csrr(XRegister rd, uint32_t csr);
1771   void Csrw(uint32_t csr, XRegister rs);
1772   void Csrs(uint32_t csr, XRegister rs);
1773   void Csrc(uint32_t csr, XRegister rs);
1774   void Csrwi(uint32_t csr, uint32_t uimm5);
1775   void Csrsi(uint32_t csr, uint32_t uimm5);
1776   void Csrci(uint32_t csr, uint32_t uimm5);
1777 
1778   // Load/store macros for arbitrary 32-bit offsets.
1779   void Loadb(XRegister rd, XRegister rs1, int32_t offset);
1780   void Loadh(XRegister rd, XRegister rs1, int32_t offset);
1781   void Loadw(XRegister rd, XRegister rs1, int32_t offset);
1782   void Loadd(XRegister rd, XRegister rs1, int32_t offset);
1783   void Loadbu(XRegister rd, XRegister rs1, int32_t offset);
1784   void Loadhu(XRegister rd, XRegister rs1, int32_t offset);
1785   void Loadwu(XRegister rd, XRegister rs1, int32_t offset);
1786   void Storeb(XRegister rs2, XRegister rs1, int32_t offset);
1787   void Storeh(XRegister rs2, XRegister rs1, int32_t offset);
1788   void Storew(XRegister rs2, XRegister rs1, int32_t offset);
1789   void Stored(XRegister rs2, XRegister rs1, int32_t offset);
1790   void FLoadw(FRegister rd, XRegister rs1, int32_t offset);
1791   void FLoadd(FRegister rd, XRegister rs1, int32_t offset);
1792   void FStorew(FRegister rs2, XRegister rs1, int32_t offset);
1793   void FStored(FRegister rs2, XRegister rs1, int32_t offset);
1794 
1795   // Macros for loading constants.
1796   void LoadConst32(XRegister rd, int32_t value);
1797   void LoadConst64(XRegister rd, int64_t value);
1798 
1799   // Macros for adding constants.
1800   void AddConst32(XRegister rd, XRegister rs1, int32_t value);
1801   void AddConst64(XRegister rd, XRegister rs1, int64_t value);
1802 
1803   // Jumps and branches to a label.
1804   void Beqz(XRegister rs, Riscv64Label* label, bool is_bare = false);
1805   void Bnez(XRegister rs, Riscv64Label* label, bool is_bare = false);
1806   void Blez(XRegister rs, Riscv64Label* label, bool is_bare = false);
1807   void Bgez(XRegister rs, Riscv64Label* label, bool is_bare = false);
1808   void Bltz(XRegister rs, Riscv64Label* label, bool is_bare = false);
1809   void Bgtz(XRegister rs, Riscv64Label* label, bool is_bare = false);
1810   void Beq(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare = false);
1811   void Bne(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare = false);
1812   void Ble(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare = false);
1813   void Bge(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare = false);
1814   void Blt(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare = false);
1815   void Bgt(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare = false);
1816   void Bleu(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare = false);
1817   void Bgeu(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare = false);
1818   void Bltu(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare = false);
1819   void Bgtu(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare = false);
1820   void Jal(XRegister rd, Riscv64Label* label, bool is_bare = false);
1821   void J(Riscv64Label* label, bool is_bare = false);
1822   void Jal(Riscv64Label* label, bool is_bare = false);
1823 
1824   // Literal load.
1825   void Loadw(XRegister rd, Literal* literal);
1826   void Loadwu(XRegister rd, Literal* literal);
1827   void Loadd(XRegister rd, Literal* literal);
1828   void FLoadw(FRegister rd, Literal* literal);
1829   void FLoadd(FRegister rd, Literal* literal);
1830 
1831   // Illegal instruction that triggers SIGILL.
1832   void Unimp();
1833 
1834   /////////////////////////////// RV64 MACRO Instructions END ///////////////////////////////
1835 
Bind(Label * label)1836   void Bind(Label* label) override { Bind(down_cast<Riscv64Label*>(label)); }
1837 
Jump(Label * label)1838   void Jump([[maybe_unused]] Label* label) override {
1839     UNIMPLEMENTED(FATAL) << "Do not use Jump for RISCV64";
1840   }
1841 
Jump(Riscv64Label * label)1842   void Jump(Riscv64Label* label) {
1843     J(label);
1844   }
1845 
1846   void Bind(Riscv64Label* label);
1847 
1848   // Load label address using PC-relative loads.
1849   void LoadLabelAddress(XRegister rd, Riscv64Label* label);
1850 
1851   // Create a new literal with a given value.
1852   // NOTE:Use `Identity<>` to force the template parameter to be explicitly specified.
1853   template <typename T>
NewLiteral(typename Identity<T>::type value)1854   Literal* NewLiteral(typename Identity<T>::type value) {
1855     static_assert(std::is_integral<T>::value, "T must be an integral type.");
1856     return NewLiteral(sizeof(value), reinterpret_cast<const uint8_t*>(&value));
1857   }
1858 
1859   // Create a new literal with the given data.
1860   Literal* NewLiteral(size_t size, const uint8_t* data);
1861 
1862   // Create a jump table for the given labels that will be emitted when finalizing.
1863   // When the table is emitted, offsets will be relative to the location of the table.
1864   // The table location is determined by the location of its label (the label precedes
1865   // the table data) and should be loaded using LoadLabelAddress().
1866   JumpTable* CreateJumpTable(ArenaVector<Riscv64Label*>&& labels);
1867 
1868  public:
1869   // Emit slow paths queued during assembly, promote short branches to long if needed,
1870   // and emit branches.
1871   void FinalizeCode() override;
1872 
1873   template <typename Reg>
IsShortReg(Reg reg)1874   static inline bool IsShortReg(Reg reg) {
1875     static_assert(std::is_same_v<Reg, XRegister> || std::is_same_v<Reg, FRegister>);
1876     uint32_t uv = enum_cast<uint32_t>(reg) - 8u;
1877     return IsUint<3>(uv);
1878   }
1879 
1880   // Returns the current location of a label.
1881   //
1882   // This function must be used instead of `Riscv64Label::GetPosition()`
1883   // which returns assembler's internal data instead of an actual location.
1884   //
1885   // The location can change during branch fixup in `FinalizeCode()`. Before that,
1886   // the location is not final and therefore not very useful to external users,
1887   // so they should preferably retrieve the location only after `FinalizeCode()`.
1888   uint32_t GetLabelLocation(const Riscv64Label* label) const;
1889 
1890   // Get the final position of a label after local fixup based on the old position
1891   // recorded before FinalizeCode().
1892   uint32_t GetAdjustedPosition(uint32_t old_position);
1893 
1894  private:
ConvertExtensions(const Riscv64InstructionSetFeatures * instruction_set_features)1895   static uint32_t ConvertExtensions(
1896       const Riscv64InstructionSetFeatures* instruction_set_features) {
1897     // The `Riscv64InstructionSetFeatures` currently does not support "Zcb",
1898     // only the original "C" extension. For riscv64 that means "Zca" and "Zcd".
1899     constexpr Riscv64ExtensionMask kCompressedExtensionsMask =
1900         Riscv64ExtensionBit(Riscv64Extension::kZca) | Riscv64ExtensionBit(Riscv64Extension::kZcd);
1901     return
1902         (Riscv64ExtensionBit(Riscv64Extension::kLoadStore)) |
1903         (Riscv64ExtensionBit(Riscv64Extension::kZifencei)) |
1904         (Riscv64ExtensionBit(Riscv64Extension::kM)) |
1905         (Riscv64ExtensionBit(Riscv64Extension::kA)) |
1906         (Riscv64ExtensionBit(Riscv64Extension::kZicsr)) |
1907         (Riscv64ExtensionBit(Riscv64Extension::kF)) |
1908         (Riscv64ExtensionBit(Riscv64Extension::kD)) |
1909         (instruction_set_features->HasZba() ? Riscv64ExtensionBit(Riscv64Extension::kZba) : 0u) |
1910         (instruction_set_features->HasZbb() ? Riscv64ExtensionBit(Riscv64Extension::kZbb) : 0u) |
1911         (instruction_set_features->HasZbs() ? Riscv64ExtensionBit(Riscv64Extension::kZbs) : 0u) |
1912         (instruction_set_features->HasVector() ? Riscv64ExtensionBit(Riscv64Extension::kV) : 0u) |
1913         (instruction_set_features->HasCompressed() ? kCompressedExtensionsMask : 0u);
1914   }
1915 
AssertExtensionsEnabled(Riscv64Extension ext)1916   void AssertExtensionsEnabled(Riscv64Extension ext) {
1917     DCHECK(IsExtensionEnabled(ext))
1918         << "ext=" << enum_cast<>(ext) << " enabled=0x" << std::hex << enabled_extensions_;
1919   }
1920 
1921   template <typename... OtherExt>
AssertExtensionsEnabled(Riscv64Extension ext,OtherExt...other_ext)1922   void AssertExtensionsEnabled(Riscv64Extension ext, OtherExt... other_ext) {
1923     AssertExtensionsEnabled(ext);
1924     AssertExtensionsEnabled(other_ext...);
1925   }
1926 
1927   enum BranchCondition : uint8_t {
1928     kCondEQ,
1929     kCondNE,
1930     kCondLT,
1931     kCondGE,
1932     kCondLE,
1933     kCondGT,
1934     kCondLTU,
1935     kCondGEU,
1936     kCondLEU,
1937     kCondGTU,
1938     kUncond,
1939   };
1940 
1941   // Note that PC-relative literal loads are handled as pseudo branches because they need
1942   // to be emitted after branch relocation to use correct offsets.
1943   class Branch {
1944    public:
1945     enum Type : uint8_t {
1946       // Compressed branches (can be promoted to longer)
1947       kCondCBranch,
1948       kUncondCBranch,
1949       // Compressed branches (can't be promoted to longer)
1950       kBareCondCBranch,
1951       kBareUncondCBranch,
1952 
1953       // Short branches (can be promoted to longer).
1954       kCondBranch,
1955       kUncondBranch,
1956       kCall,
1957       // Short branches (can't be promoted to longer).
1958       kBareCondBranch,
1959       kBareUncondBranch,
1960       kBareCall,
1961 
1962       // Medium branches (can be promoted to long).
1963       // Compressed version
1964       kCondCBranch21,
1965       kCondBranch21,
1966 
1967       // Long branches.
1968       kLongCondBranch,
1969       kLongUncondBranch,
1970       kLongCall,
1971 
1972       // Label.
1973       kLabel,
1974 
1975       // Literals.
1976       kLiteral,
1977       kLiteralUnsigned,
1978       kLiteralLong,
1979       kLiteralFloat,
1980       kLiteralDouble,
1981     };
1982 
1983     // Bit sizes of offsets defined as enums to minimize chance of typos.
1984     enum OffsetBits {
1985       kOffset9 = 9,
1986       kOffset12 = 12,
1987       kOffset13 = 13,
1988       kOffset21 = 21,
1989       kOffset32 = 32,
1990     };
1991 
1992     static constexpr uint32_t kUnresolved = 0xffffffff;  // Unresolved target_
1993     static constexpr uint32_t kMaxBranchLength = 12;  // In bytes.
1994 
1995     struct BranchInfo {
1996       // Branch length in bytes.
1997       uint32_t length;
1998       // The offset in bytes of the PC used in the (only) PC-relative instruction from
1999       // the start of the branch sequence. RISC-V always uses the address of the PC-relative
2000       // instruction as the PC, so this is essentially the offset of that instruction.
2001       uint32_t pc_offset;
2002       // How large (in bits) a PC-relative offset can be for a given type of branch.
2003       OffsetBits offset_size;
2004     };
2005     static const BranchInfo branch_info_[/* Type */];
2006 
2007     // Unconditional branch or call.
2008     Branch(
2009         uint32_t location, uint32_t target, XRegister rd, bool is_bare, bool compression_allowed);
2010     // Conditional branch.
2011     Branch(uint32_t location,
2012            uint32_t target,
2013            BranchCondition condition,
2014            XRegister lhs_reg,
2015            XRegister rhs_reg,
2016            bool is_bare,
2017            bool compression_allowed);
2018     // Label address or literal.
2019     Branch(uint32_t location, uint32_t target, XRegister rd, Type label_or_literal_type);
2020     Branch(uint32_t location, uint32_t target, FRegister rd, Type literal_type);
2021 
2022     // Some conditional branches with lhs = rhs are effectively NOPs, while some
2023     // others are effectively unconditional.
2024     static bool IsNop(BranchCondition condition, XRegister lhs, XRegister rhs);
2025     static bool IsUncond(BranchCondition condition, XRegister lhs, XRegister rhs);
2026     static bool IsCompressed(Type type);
2027 
2028     static BranchCondition OppositeCondition(BranchCondition cond);
2029 
2030     Type GetType() const;
2031     Type GetOldType() const;
2032     BranchCondition GetCondition() const;
2033     XRegister GetLeftRegister() const;
2034     XRegister GetRightRegister() const;
2035     XRegister GetNonZeroRegister() const;
2036     FRegister GetFRegister() const;
2037     uint32_t GetTarget() const;
2038     uint32_t GetLocation() const;
2039     uint32_t GetOldLocation() const;
2040     uint32_t GetLength() const;
2041     uint32_t GetOldLength() const;
2042     uint32_t GetEndLocation() const;
2043     uint32_t GetOldEndLocation() const;
2044     bool IsBare() const;
2045     bool IsResolved() const;
2046 
2047     uint32_t NextBranchId() const;
2048 
2049     // Checks if condition meets compression requirements
2050     bool IsCompressableCondition() const;
2051 
2052     // Returns the bit size of the signed offset that the branch instruction can handle.
2053     OffsetBits GetOffsetSize() const;
2054 
2055     // Calculates the distance between two byte locations in the assembler buffer and
2056     // returns the number of bits needed to represent the distance as a signed integer.
2057     static OffsetBits GetOffsetSizeNeeded(uint32_t location, uint32_t target);
2058 
2059     // Resolve a branch when the target is known.
2060     void Resolve(uint32_t target);
2061 
2062     // Relocate a branch by a given delta if needed due to expansion of this or another
2063     // branch at a given location by this delta (just changes location_ and target_).
2064     void Relocate(uint32_t expand_location, uint32_t delta);
2065 
2066     // If necessary, updates the type by promoting a short branch to a longer branch
2067     // based on the branch location and target. Returns the amount (in bytes) by
2068     // which the branch size has increased.
2069     uint32_t PromoteIfNeeded();
2070 
2071     // Returns the offset into assembler buffer that shall be used as the base PC for
2072     // offset calculation. RISC-V always uses the address of the PC-relative instruction
2073     // as the PC, so this is essentially the location of that instruction.
2074     uint32_t GetOffsetLocation() const;
2075 
2076     // Calculates and returns the offset ready for encoding in the branch instruction(s).
2077     int32_t GetOffset() const;
2078 
2079     // Link with the next branch
2080     void LinkToList(uint32_t next_branch_id);
2081 
2082    private:
2083     // Completes branch construction by determining and recording its type.
2084     void InitializeType(Type initial_type);
2085     // Helper for the above.
2086     void InitShortOrLong(OffsetBits ofs_size, Type short_type, Type long_type, Type longest_type);
2087     void InitShortOrLong(OffsetBits ofs_size,
2088                          Type compressed_type,
2089                          Type short_type,
2090                          Type long_type,
2091                          Type longest_type);
2092 
2093     uint32_t old_location_;  // Offset into assembler buffer in bytes.
2094     uint32_t location_;      // Offset into assembler buffer in bytes.
2095     uint32_t target_;        // Offset into assembler buffer in bytes.
2096 
2097     XRegister lhs_reg_;          // Left-hand side register in conditional branches or
2098                                  // destination register in calls or literals.
2099     XRegister rhs_reg_;          // Right-hand side register in conditional branches.
2100     FRegister freg_;             // Destination register in FP literals.
2101     BranchCondition condition_;  // Condition for conditional branches.
2102 
2103     Type type_;      // Current type of the branch.
2104     Type old_type_;  // Initial type of the branch.
2105 
2106     // Id of the next branch bound to the same label in singly-linked zero-terminated list
2107     // NOTE: encoded the same way as a position in a linked Label (id + sizeof(void*))
2108     // Label itself is used to hold the 'head' of this list
2109     uint32_t next_branch_id_;
2110 
2111     bool compression_allowed_;
2112   };
2113 
2114   // Branch and literal fixup.
2115 
2116   void EmitBcond(BranchCondition cond, XRegister rs, XRegister rt, int32_t offset);
2117   void EmitBranch(Branch* branch);
2118   void EmitBranches();
2119   void EmitJumpTables();
2120   void EmitLiterals();
2121 
2122   void FinalizeLabeledBranch(Riscv64Label* label);
2123   void Bcond(Riscv64Label* label,
2124              bool is_bare,
2125              BranchCondition condition,
2126              XRegister lhs,
2127              XRegister rhs);
2128   void Buncond(Riscv64Label* label, XRegister rd, bool is_bare);
2129   template <typename XRegisterOrFRegister>
2130   void LoadLiteral(Literal* literal, XRegisterOrFRegister rd, Branch::Type literal_type);
2131 
2132   Branch* GetBranch(uint32_t branch_id);
2133   const Branch* GetBranch(uint32_t branch_id) const;
2134 
2135   void ReserveJumpTableSpace();
2136   void PromoteBranches();
2137   void PatchCFI();
2138 
2139   // Emit data (e.g. encoded instruction or immediate) to the instruction stream.
2140   template <typename T>
Emit(T value)2141   void Emit(T value) {
2142     static_assert(std::is_same_v<T, uint32_t> || std::is_same_v<T, uint16_t>,
2143                   "Only Integer types are allowed");
2144     if (overwriting_) {
2145       // Branches to labels are emitted into their placeholders here.
2146       buffer_.Store<T>(overwrite_location_, value);
2147       overwrite_location_ += sizeof(T);
2148     } else {
2149       // Other instructions are simply appended at the end here.
2150       AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2151       buffer_.Emit<T>(value);
2152     }
2153   }
2154 
Emit16(uint32_t value)2155   void Emit16(uint32_t value) { Emit(dchecked_integral_cast<uint16_t>(value)); }
Emit32(uint32_t value)2156   void Emit32(uint32_t value) { Emit(value); }
2157 
2158   // Adjust base register and offset if needed for load/store with a large offset.
2159   void AdjustBaseAndOffset(XRegister& base, int32_t& offset, ScratchRegisterScope& srs);
2160 
2161   // Helper templates for loads/stores with 32-bit offsets.
2162   template <void (Riscv64Assembler::*insn)(XRegister, XRegister, int32_t)>
2163   void LoadFromOffset(XRegister rd, XRegister rs1, int32_t offset);
2164   template <void (Riscv64Assembler::*insn)(XRegister, XRegister, int32_t)>
2165   void StoreToOffset(XRegister rs2, XRegister rs1, int32_t offset);
2166   template <void (Riscv64Assembler::*insn)(FRegister, XRegister, int32_t)>
2167   void FLoadFromOffset(FRegister rd, XRegister rs1, int32_t offset);
2168   template <void (Riscv64Assembler::*insn)(FRegister, XRegister, int32_t)>
2169   void FStoreToOffset(FRegister rs2, XRegister rs1, int32_t offset);
2170 
2171   // Implementation helper for `Li()`, `LoadConst32()` and `LoadConst64()`.
2172   void LoadImmediate(XRegister rd, int64_t imm, bool can_use_tmp);
2173 
2174   // RVV constants and helpers
2175 
2176   enum class Nf : uint32_t {
2177     k1 = 0b000,
2178     k2 = 0b001,
2179     k3 = 0b010,
2180     k4 = 0b011,
2181     k5 = 0b100,
2182     k6 = 0b101,
2183     k7 = 0b110,
2184     k8 = 0b111,
2185   };
2186 
2187   enum class VAIEncoding : uint32_t {
2188                      // ----Operands---- | Type of Scalar                | Instruction type
2189     kOPIVV = 0b000,  // vector-vector    | --                            | R-type
2190     kOPFVV = 0b001,  // vector-vector    | --                            | R-type
2191     kOPMVV = 0b010,  // vector-vector    | --                            | R-type
2192     kOPIVI = 0b011,  // vector-immediate | imm[4:0]                      | R-type
2193     kOPIVX = 0b100,  // vector-scalar    | GPR x register rs1            | R-type
2194     kOPFVF = 0b101,  // vector-scalar    | FP f register rs1             | R-type
2195     kOPMVX = 0b110,  // vector-scalar    | GPR x register rs1            | R-type
2196     kOPCFG = 0b111,  // scalars-imms     | GPR x register rs1 & rs2/imm  | R/I-type
2197   };
2198 
2199   enum class MemAddressMode : uint32_t {
2200     kUnitStride = 0b00,
2201     kIndexedUnordered = 0b01,
2202     kStrided = 0b10,
2203     kIndexedOrdered = 0b11,
2204   };
2205 
2206   enum class VectorWidth : uint32_t {
2207     k8 = 0b000,
2208     k16 = 0b101,
2209     k32 = 0b110,
2210     k64 = 0b111,
2211 
2212     kMask = 0b000,
2213     kWholeR = 0b000,
2214   };
2215 
EncodeRVVMemF7(const Nf nf,const uint32_t mew,const MemAddressMode mop,const VM vm)2216   static constexpr uint32_t EncodeRVVMemF7(const Nf nf,
2217                                            const uint32_t mew,
2218                                            const MemAddressMode mop,
2219                                            const VM vm) {
2220     DCHECK(IsUint<3>(enum_cast<uint32_t>(nf)));
2221     DCHECK(IsUint<1>(mew));
2222     DCHECK(IsUint<2>(enum_cast<uint32_t>(mop)));
2223     DCHECK(IsUint<1>(enum_cast<uint32_t>(vm)));
2224 
2225     return enum_cast<uint32_t>(nf) << 4 | mew << 3 | enum_cast<uint32_t>(mop) << 1 |
2226            enum_cast<uint32_t>(vm);
2227   }
2228 
EncodeRVVF7(const uint32_t funct6,const VM vm)2229   static constexpr uint32_t EncodeRVVF7(const uint32_t funct6, const VM vm) {
2230     DCHECK(IsUint<6>(funct6));
2231     return funct6 << 1 | enum_cast<uint32_t>(vm);
2232   }
2233 
2234   template <unsigned kWidth>
EncodeIntWidth(const int32_t imm)2235   static constexpr uint32_t EncodeIntWidth(const int32_t imm) {
2236     DCHECK(IsInt<kWidth>(imm));
2237     return static_cast<uint32_t>(imm) & MaskLeastSignificant<uint32_t>(kWidth);
2238   }
2239 
EncodeInt5(const int32_t imm)2240   static constexpr uint32_t EncodeInt5(const int32_t imm) { return EncodeIntWidth<5>(imm); }
EncodeInt6(const int32_t imm)2241   static constexpr uint32_t EncodeInt6(const int32_t imm) { return EncodeIntWidth<6>(imm); }
2242 
2243   template <typename Reg>
EncodeShortReg(const Reg reg)2244   static constexpr uint32_t EncodeShortReg(const Reg reg) {
2245     DCHECK(IsShortReg(reg));
2246     return enum_cast<uint32_t>(reg) - 8u;
2247   }
2248 
2249   // Rearrange given offset in the way {offset[0] | offset[1]}
EncodeOffset0_1(int32_t offset)2250   static constexpr uint32_t EncodeOffset0_1(int32_t offset) {
2251     uint32_t u_offset = static_cast<uint32_t>(offset);
2252     DCHECK(IsUint<2>(u_offset));
2253 
2254     return u_offset >> 1 | (u_offset & 1u) << 1;
2255   }
2256 
2257   // Rearrange given offset, scaled by 4, in the way {offset[5:2] | offset[7:6]}
ExtractOffset52_76(int32_t offset)2258   static constexpr uint32_t ExtractOffset52_76(int32_t offset) {
2259     DCHECK(IsAligned<4>(offset)) << "Offset should be scalable by 4";
2260 
2261     uint32_t u_offset = static_cast<uint32_t>(offset);
2262     DCHECK(IsUint<6 + 2>(u_offset));
2263 
2264     uint32_t imm_52 = BitFieldExtract(u_offset, 2, 4);
2265     uint32_t imm_76 = BitFieldExtract(u_offset, 6, 2);
2266 
2267     return BitFieldInsert(imm_76, imm_52, 2, 4);
2268   }
2269 
2270   // Rearrange given offset, scaled by 8, in the way {offset[5:3] | offset[8:6]}
ExtractOffset53_86(int32_t offset)2271   static constexpr uint32_t ExtractOffset53_86(int32_t offset) {
2272     DCHECK(IsAligned<8>(offset)) << "Offset should be scalable by 8";
2273 
2274     uint32_t u_offset = static_cast<uint32_t>(offset);
2275     DCHECK(IsUint<6 + 3>(u_offset));
2276 
2277     uint32_t imm_53 = BitFieldExtract(u_offset, 3, 3);
2278     uint32_t imm_86 = BitFieldExtract(u_offset, 6, 3);
2279 
2280     return BitFieldInsert(imm_86, imm_53, 3, 3);
2281   }
2282 
2283   // Rearrange given offset, scaled by 4, in the way {offset[5:2] | offset[6]}
ExtractOffset52_6(int32_t offset)2284   static constexpr uint32_t ExtractOffset52_6(int32_t offset) {
2285     DCHECK(IsAligned<4>(offset)) << "Offset should be scalable by 4";
2286 
2287     uint32_t u_offset = static_cast<uint32_t>(offset);
2288     DCHECK(IsUint<5 + 2>(u_offset));
2289 
2290     uint32_t imm_52 = BitFieldExtract(u_offset, 2, 4);
2291     uint32_t imm_6  = BitFieldExtract(u_offset, 6, 1);
2292 
2293     return BitFieldInsert(imm_6, imm_52, 1, 4);
2294   }
2295 
2296   // Rearrange given offset, scaled by 8, in the way {offset[5:3], offset[7:6]}
ExtractOffset53_76(int32_t offset)2297   static constexpr uint32_t ExtractOffset53_76(int32_t offset) {
2298     DCHECK(IsAligned<8>(offset)) << "Offset should be scalable by 4";
2299 
2300     uint32_t u_offset = static_cast<uint32_t>(offset);
2301     DCHECK(IsUint<5 + 3>(u_offset));
2302 
2303     uint32_t imm_53 = BitFieldExtract(u_offset, 3, 3);
2304     uint32_t imm_76 = BitFieldExtract(u_offset, 6, 2);
2305 
2306     return BitFieldInsert(imm_76, imm_53, 2, 3);
2307   }
2308 
IsImmCLuiEncodable(uint32_t uimm)2309   static constexpr bool IsImmCLuiEncodable(uint32_t uimm) {
2310     // Instruction c.lui is odd and its immediate value is a bit tricky
2311     // Its value is not a full 32 bits value, but its bits [31:12]
2312     // (where the bit 17 marks the sign bit) shifted towards the bottom i.e. bits [19:0]
2313     // are the meaningful ones. Since that we want a signed non-zero 6-bit immediate to
2314     // keep values in the range [0, 0x1f], and the range [0xfffe0, 0xfffff] for negative values
2315     // since the sign bit was bit 17 (which is now bit 5 and replicated in the higher bits too)
2316     // Also encoding with immediate = 0 is reserved
2317     // For more details please see 16.5 chapter is the specification
2318 
2319     return uimm != 0u && (IsUint<5>(uimm) || IsUint<5>(uimm - 0xfffe0u));
2320   }
2321 
2322   // Emit helpers.
2323 
2324   // I-type instruction:
2325   //
2326   //    31                   20 19     15 14 12 11      7 6           0
2327   //   -----------------------------------------------------------------
2328   //   [ . . . . . . . . . . . | . . . . | . . | . . . . | . . . . . . ]
2329   //   [        imm11:0            rs1   funct3     rd        opcode   ]
2330   //   -----------------------------------------------------------------
2331   template <typename Reg1, typename Reg2>
EmitI(int32_t imm12,Reg1 rs1,uint32_t funct3,Reg2 rd,uint32_t opcode)2332   void EmitI(int32_t imm12, Reg1 rs1, uint32_t funct3, Reg2 rd, uint32_t opcode) {
2333     DCHECK(IsInt<12>(imm12)) << imm12;
2334     DCHECK(IsUint<5>(static_cast<uint32_t>(rs1)));
2335     DCHECK(IsUint<3>(funct3));
2336     DCHECK(IsUint<5>(static_cast<uint32_t>(rd)));
2337     DCHECK(IsUint<7>(opcode));
2338     uint32_t encoding = static_cast<uint32_t>(imm12) << 20 | static_cast<uint32_t>(rs1) << 15 |
2339                         funct3 << 12 | static_cast<uint32_t>(rd) << 7 | opcode;
2340     Emit32(encoding);
2341   }
2342 
2343   // R-type instruction:
2344   //
2345   //    31         25 24     20 19     15 14 12 11      7 6           0
2346   //   -----------------------------------------------------------------
2347   //   [ . . . . . . | . . . . | . . . . | . . | . . . . | . . . . . . ]
2348   //   [   funct7        rs2       rs1   funct3     rd        opcode   ]
2349   //   -----------------------------------------------------------------
2350   template <typename Reg1, typename Reg2, typename Reg3>
EmitR(uint32_t funct7,Reg1 rs2,Reg2 rs1,uint32_t funct3,Reg3 rd,uint32_t opcode)2351   void EmitR(uint32_t funct7, Reg1 rs2, Reg2 rs1, uint32_t funct3, Reg3 rd, uint32_t opcode) {
2352     DCHECK(IsUint<7>(funct7));
2353     DCHECK(IsUint<5>(static_cast<uint32_t>(rs2)));
2354     DCHECK(IsUint<5>(static_cast<uint32_t>(rs1)));
2355     DCHECK(IsUint<3>(funct3));
2356     DCHECK(IsUint<5>(static_cast<uint32_t>(rd)));
2357     DCHECK(IsUint<7>(opcode));
2358     uint32_t encoding = funct7 << 25 | static_cast<uint32_t>(rs2) << 20 |
2359                         static_cast<uint32_t>(rs1) << 15 | funct3 << 12 |
2360                         static_cast<uint32_t>(rd) << 7 | opcode;
2361     Emit32(encoding);
2362   }
2363 
2364   // R-type instruction variant for floating-point fused multiply-add/sub (F[N]MADD/ F[N]MSUB):
2365   //
2366   //    31     27  25 24     20 19     15 14 12 11      7 6           0
2367   //   -----------------------------------------------------------------
2368   //   [ . . . . | . | . . . . | . . . . | . . | . . . . | . . . . . . ]
2369   //   [  rs3     fmt    rs2       rs1   funct3     rd        opcode   ]
2370   //   -----------------------------------------------------------------
2371   template <typename Reg1, typename Reg2, typename Reg3, typename Reg4>
EmitR4(Reg1 rs3,uint32_t fmt,Reg2 rs2,Reg3 rs1,uint32_t funct3,Reg4 rd,uint32_t opcode)2372   void EmitR4(
2373       Reg1 rs3, uint32_t fmt, Reg2 rs2, Reg3 rs1, uint32_t funct3, Reg4 rd, uint32_t opcode) {
2374     DCHECK(IsUint<5>(static_cast<uint32_t>(rs3)));
2375     DCHECK(IsUint<2>(fmt));
2376     DCHECK(IsUint<5>(static_cast<uint32_t>(rs2)));
2377     DCHECK(IsUint<5>(static_cast<uint32_t>(rs1)));
2378     DCHECK(IsUint<3>(funct3));
2379     DCHECK(IsUint<5>(static_cast<uint32_t>(rd)));
2380     DCHECK(IsUint<7>(opcode));
2381     uint32_t encoding = static_cast<uint32_t>(rs3) << 27 | static_cast<uint32_t>(fmt) << 25 |
2382                         static_cast<uint32_t>(rs2) << 20 | static_cast<uint32_t>(rs1) << 15 |
2383                         static_cast<uint32_t>(funct3) << 12 | static_cast<uint32_t>(rd) << 7 |
2384                         opcode;
2385     Emit32(encoding);
2386   }
2387 
2388   // S-type instruction:
2389   //
2390   //    31         25 24     20 19     15 14 12 11      7 6           0
2391   //   -----------------------------------------------------------------
2392   //   [ . . . . . . | . . . . | . . . . | . . | . . . . | . . . . . . ]
2393   //   [   imm11:5       rs2       rs1   funct3   imm4:0      opcode   ]
2394   //   -----------------------------------------------------------------
2395   template <typename Reg1, typename Reg2>
EmitS(int32_t imm12,Reg1 rs2,Reg2 rs1,uint32_t funct3,uint32_t opcode)2396   void EmitS(int32_t imm12, Reg1 rs2, Reg2 rs1, uint32_t funct3, uint32_t opcode) {
2397     DCHECK(IsInt<12>(imm12)) << imm12;
2398     DCHECK(IsUint<5>(static_cast<uint32_t>(rs2)));
2399     DCHECK(IsUint<5>(static_cast<uint32_t>(rs1)));
2400     DCHECK(IsUint<3>(funct3));
2401     DCHECK(IsUint<7>(opcode));
2402     uint32_t encoding = (static_cast<uint32_t>(imm12) & 0xFE0) << 20 |
2403                         static_cast<uint32_t>(rs2) << 20 | static_cast<uint32_t>(rs1) << 15 |
2404                         static_cast<uint32_t>(funct3) << 12 |
2405                         (static_cast<uint32_t>(imm12) & 0x1F) << 7 | opcode;
2406     Emit32(encoding);
2407   }
2408 
2409   // I-type instruction variant for shifts (SLLI / SRLI / SRAI):
2410   //
2411   //    31       26 25       20 19     15 14 12 11      7 6           0
2412   //   -----------------------------------------------------------------
2413   //   [ . . . . . | . . . . . | . . . . | . . | . . . . | . . . . . . ]
2414   //   [  imm11:6  imm5:0(shamt)   rs1   funct3     rd        opcode   ]
2415   //   -----------------------------------------------------------------
EmitI6(uint32_t funct6,uint32_t imm6,XRegister rs1,uint32_t funct3,XRegister rd,uint32_t opcode)2416   void EmitI6(uint32_t funct6,
2417               uint32_t imm6,
2418               XRegister rs1,
2419               uint32_t funct3,
2420               XRegister rd,
2421               uint32_t opcode) {
2422     DCHECK(IsUint<6>(funct6));
2423     DCHECK(IsUint<6>(imm6)) << imm6;
2424     DCHECK(IsUint<5>(static_cast<uint32_t>(rs1)));
2425     DCHECK(IsUint<3>(funct3));
2426     DCHECK(IsUint<5>(static_cast<uint32_t>(rd)));
2427     DCHECK(IsUint<7>(opcode));
2428     uint32_t encoding = funct6 << 26 | static_cast<uint32_t>(imm6) << 20 |
2429                         static_cast<uint32_t>(rs1) << 15 | funct3 << 12 |
2430                         static_cast<uint32_t>(rd) << 7 | opcode;
2431     Emit32(encoding);
2432   }
2433 
2434   // B-type instruction:
2435   //
2436   //   31 30       25 24     20 19     15 14 12 11    8 7 6           0
2437   //   -----------------------------------------------------------------
2438   //   [ | . . . . . | . . . . | . . . . | . . | . . . | | . . . . . . ]
2439   //  imm12 imm11:5      rs2       rs1   funct3 imm4:1 imm11  opcode   ]
2440   //   -----------------------------------------------------------------
EmitB(int32_t offset,XRegister rs2,XRegister rs1,uint32_t funct3,uint32_t opcode)2441   void EmitB(int32_t offset, XRegister rs2, XRegister rs1, uint32_t funct3, uint32_t opcode) {
2442     DCHECK_ALIGNED(offset, 2);
2443     DCHECK(IsInt<13>(offset)) << offset;
2444     DCHECK(IsUint<5>(static_cast<uint32_t>(rs2)));
2445     DCHECK(IsUint<5>(static_cast<uint32_t>(rs1)));
2446     DCHECK(IsUint<3>(funct3));
2447     DCHECK(IsUint<7>(opcode));
2448     uint32_t imm12 = (static_cast<uint32_t>(offset) >> 1) & 0xfffu;
2449     uint32_t encoding = (imm12 & 0x800u) << (31 - 11) | (imm12 & 0x03f0u) << (25 - 4) |
2450                         static_cast<uint32_t>(rs2) << 20 | static_cast<uint32_t>(rs1) << 15 |
2451                         static_cast<uint32_t>(funct3) << 12 |
2452                         (imm12 & 0xfu) << 8 | (imm12 & 0x400u) >> (10 - 7) | opcode;
2453     Emit32(encoding);
2454   }
2455 
2456   // U-type instruction:
2457   //
2458   //    31                                   12 11      7 6           0
2459   //   -----------------------------------------------------------------
2460   //   [ . . . . . . . . . . . . . . . . . . . | . . . . | . . . . . . ]
2461   //   [                imm31:12                    rd        opcode   ]
2462   //   -----------------------------------------------------------------
EmitU(uint32_t imm20,XRegister rd,uint32_t opcode)2463   void EmitU(uint32_t imm20, XRegister rd, uint32_t opcode) {
2464     CHECK(IsUint<20>(imm20)) << imm20;
2465     DCHECK(IsUint<5>(static_cast<uint32_t>(rd)));
2466     DCHECK(IsUint<7>(opcode));
2467     uint32_t encoding = imm20 << 12 | static_cast<uint32_t>(rd) << 7 | opcode;
2468     Emit32(encoding);
2469   }
2470 
2471   // J-type instruction:
2472   //
2473   //   31 30               21   19           12 11      7 6           0
2474   //   -----------------------------------------------------------------
2475   //   [ | . . . . . . . . . | | . . . . . . . | . . . . | . . . . . . ]
2476   //  imm20    imm10:1      imm11   imm19:12        rd        opcode   ]
2477   //   -----------------------------------------------------------------
EmitJ(int32_t offset,XRegister rd,uint32_t opcode)2478   void EmitJ(int32_t offset, XRegister rd, uint32_t opcode) {
2479     DCHECK_ALIGNED(offset, 2);
2480     CHECK(IsInt<21>(offset)) << offset;
2481     DCHECK(IsUint<5>(static_cast<uint32_t>(rd)));
2482     DCHECK(IsUint<7>(opcode));
2483     uint32_t imm20 = (static_cast<uint32_t>(offset) >> 1) & 0xfffffu;
2484     uint32_t encoding = (imm20 & 0x80000u) << (31 - 19) | (imm20 & 0x03ffu) << 21 |
2485                         (imm20 & 0x400u) << (20 - 10) | (imm20 & 0x7f800u) << (12 - 11) |
2486                         static_cast<uint32_t>(rd) << 7 | opcode;
2487     Emit32(encoding);
2488   }
2489 
2490   // Compressed Instruction Encodings
2491 
2492   // CR-type instruction:
2493   //
2494   //   15    12 11      7 6       2 1 0
2495   //   ---------------------------------
2496   //   [ . . . | . . . . | . . . . | . ]
2497   //   [ func4   rd/rs1      rs2    op ]
2498   //   ---------------------------------
2499   //
EmitCR(uint32_t funct4,XRegister rd_rs1,XRegister rs2,uint32_t opcode)2500   void EmitCR(uint32_t funct4, XRegister rd_rs1, XRegister rs2, uint32_t opcode) {
2501     DCHECK(IsUint<4>(funct4));
2502     DCHECK(IsUint<5>(static_cast<uint32_t>(rd_rs1)));
2503     DCHECK(IsUint<5>(static_cast<uint32_t>(rs2)));
2504     DCHECK(IsUint<2>(opcode));
2505 
2506     uint32_t encoding = funct4 << 12 | static_cast<uint32_t>(rd_rs1) << 7 |
2507                         static_cast<uint32_t>(rs2) << 2 | opcode;
2508     Emit16(encoding);
2509   }
2510 
2511   // CI-type instruction:
2512   //
2513   //   15  13   11      7 6       2 1 0
2514   //   ---------------------------------
2515   //   [ . . | | . . . . | . . . . | . ]
2516   //   [func3 imm rd/rs1     imm    op ]
2517   //   ---------------------------------
2518   //
2519   template <typename Reg>
EmitCI(uint32_t funct3,Reg rd_rs1,uint32_t imm6,uint32_t opcode)2520   void EmitCI(uint32_t funct3, Reg rd_rs1, uint32_t imm6, uint32_t opcode) {
2521     DCHECK(IsUint<3>(funct3));
2522     DCHECK(IsUint<5>(static_cast<uint32_t>(rd_rs1)));
2523     DCHECK(IsUint<6>(imm6));
2524     DCHECK(IsUint<2>(opcode));
2525 
2526     uint32_t immH1 = BitFieldExtract(imm6, 5, 1);
2527     uint32_t immL5 = BitFieldExtract(imm6, 0, 5);
2528 
2529     uint32_t encoding =
2530         funct3 << 13 | immH1 << 12 | static_cast<uint32_t>(rd_rs1) << 7 | immL5 << 2 | opcode;
2531     Emit16(encoding);
2532   }
2533 
2534   // CSS-type instruction:
2535   //
2536   //   15  13 12        7 6       2 1 0
2537   //   ---------------------------------
2538   //   [ . . | . . . . . | . . . . | . ]
2539   //   [func3     imm6      rs2     op ]
2540   //   ---------------------------------
2541   //
2542   template <typename Reg>
EmitCSS(uint32_t funct3,uint32_t offset6,Reg rs2,uint32_t opcode)2543   void EmitCSS(uint32_t funct3, uint32_t offset6, Reg rs2, uint32_t opcode) {
2544     DCHECK(IsUint<3>(funct3));
2545     DCHECK(IsUint<6>(offset6));
2546     DCHECK(IsUint<5>(static_cast<uint32_t>(rs2)));
2547     DCHECK(IsUint<2>(opcode));
2548 
2549     uint32_t encoding = funct3 << 13 | offset6 << 7 | static_cast<uint32_t>(rs2) << 2 | opcode;
2550     Emit16(encoding);
2551   }
2552 
2553   // CIW-type instruction:
2554   //
2555   //   15  13 12            5 4   2 1 0
2556   //   ---------------------------------
2557   //   [ . . | . . . . . . . | . . | . ]
2558   //   [func3     imm8         rd'  op ]
2559   //   ---------------------------------
2560   //
EmitCIW(uint32_t funct3,uint32_t imm8,XRegister rd_s,uint32_t opcode)2561   void EmitCIW(uint32_t funct3, uint32_t imm8, XRegister rd_s, uint32_t opcode) {
2562     DCHECK(IsUint<3>(funct3));
2563     DCHECK(IsUint<8>(imm8));
2564     DCHECK(IsShortReg(rd_s)) << rd_s;
2565     DCHECK(IsUint<2>(opcode));
2566 
2567     uint32_t encoding = funct3 << 13 | imm8 << 5 | EncodeShortReg(rd_s) << 2 | opcode;
2568     Emit16(encoding);
2569   }
2570 
2571   // CL/S-type instruction:
2572   //
2573   //   15  13 12  10 9  7 6 5 4   2 1 0
2574   //   ---------------------------------
2575   //   [ . . | . . | . . | . | . . | . ]
2576   //   [func3  imm   rs1' imm rds2' op ]
2577   //   ---------------------------------
2578   //
2579   template <typename Reg>
EmitCM(uint32_t funct3,uint32_t imm5,XRegister rs1_s,Reg rd_rs2_s,uint32_t opcode)2580   void EmitCM(uint32_t funct3, uint32_t imm5, XRegister rs1_s, Reg rd_rs2_s, uint32_t opcode) {
2581     DCHECK(IsUint<3>(funct3));
2582     DCHECK(IsUint<5>(imm5));
2583     DCHECK(IsShortReg(rs1_s)) << rs1_s;
2584     DCHECK(IsShortReg(rd_rs2_s)) << rd_rs2_s;
2585     DCHECK(IsUint<2>(opcode));
2586 
2587     uint32_t immH3 = BitFieldExtract(imm5, 2, 3);
2588     uint32_t immL2 = BitFieldExtract(imm5, 0, 2);
2589 
2590     uint32_t encoding = funct3 << 13 | immH3 << 10 | EncodeShortReg(rs1_s) << 7 | immL2 << 5 |
2591                         EncodeShortReg(rd_rs2_s) << 2 | opcode;
2592     Emit16(encoding);
2593   }
2594 
2595   // CA-type instruction:
2596   //
2597   //   15         10 9  7 6 5 4   2 1 0
2598   //   ---------------------------------
2599   //   [ . . . . . | . . | . | . . | . ]
2600   //   [    funct6 rds1' funct2 rs2' op]
2601   //   ---------------------------------
2602   //
EmitCA(uint32_t funct6,XRegister rd_rs1_s,uint32_t funct2,uint32_t rs2_v,uint32_t opcode)2603   void EmitCA(
2604       uint32_t funct6, XRegister rd_rs1_s, uint32_t funct2, uint32_t rs2_v, uint32_t opcode) {
2605     DCHECK(IsUint<6>(funct6));
2606     DCHECK(IsShortReg(rd_rs1_s)) << rd_rs1_s;
2607     DCHECK(IsUint<2>(funct2));
2608     DCHECK(IsUint<3>(rs2_v));
2609     DCHECK(IsUint<2>(opcode));
2610 
2611     uint32_t encoding =
2612         funct6 << 10 | EncodeShortReg(rd_rs1_s) << 7 | funct2 << 5  | rs2_v << 2 | opcode;
2613     Emit16(encoding);
2614   }
2615 
EmitCAReg(uint32_t funct6,XRegister rd_rs1_s,uint32_t funct2,XRegister rs2_s,uint32_t opcode)2616   void EmitCAReg(
2617       uint32_t funct6, XRegister rd_rs1_s, uint32_t funct2, XRegister rs2_s, uint32_t opcode) {
2618     DCHECK(IsShortReg(rs2_s)) << rs2_s;
2619     EmitCA(funct6, rd_rs1_s, funct2, EncodeShortReg(rs2_s), opcode);
2620   }
2621 
EmitCAImm(uint32_t funct6,XRegister rd_rs1_s,uint32_t funct2,uint32_t funct3,uint32_t opcode)2622   void EmitCAImm(
2623       uint32_t funct6, XRegister rd_rs1_s, uint32_t funct2, uint32_t funct3, uint32_t opcode) {
2624     EmitCA(funct6, rd_rs1_s, funct2, funct3, opcode);
2625   }
2626 
2627   // CB-type instruction:
2628   //
2629   //   15  13 12  10 9  7 6       2 1 0
2630   //   ---------------------------------
2631   //   [ . . | . . | . . | . . . . | . ]
2632   //   [func3 offset rs1'   offset  op ]
2633   //   ---------------------------------
2634   //
EmitCB(uint32_t funct3,int32_t offset8,XRegister rd_rs1_s,uint32_t opcode)2635   void EmitCB(uint32_t funct3, int32_t offset8, XRegister rd_rs1_s, uint32_t opcode) {
2636     DCHECK(IsUint<3>(funct3));
2637     DCHECK(IsUint<8>(offset8));
2638     DCHECK(IsShortReg(rd_rs1_s)) << rd_rs1_s;
2639     DCHECK(IsUint<2>(opcode));
2640 
2641     uint32_t offsetH3 = BitFieldExtract<uint32_t>(offset8, 5, 3);
2642     uint32_t offsetL5 = BitFieldExtract<uint32_t>(offset8, 0, 5);
2643 
2644     uint32_t encoding =
2645         funct3 << 13 | offsetH3 << 10 | EncodeShortReg(rd_rs1_s) << 7 | offsetL5 << 2 | opcode;
2646     Emit16(encoding);
2647   }
2648 
2649   // Wrappers for EmitCB with different imm bit permutation
EmitCBBranch(uint32_t funct3,int32_t offset,XRegister rs1_s,uint32_t opcode)2650   void EmitCBBranch(uint32_t funct3, int32_t offset, XRegister rs1_s, uint32_t opcode) {
2651     DCHECK(IsInt<9>(offset));
2652     DCHECK_ALIGNED(offset, 2);
2653 
2654     uint32_t u_offset = static_cast<uint32_t>(offset);
2655 
2656     // offset[8|4:3]
2657     uint32_t offsetH3 = (BitFieldExtract(u_offset, 8, 1) << 2) |
2658                          BitFieldExtract(u_offset, 3, 2);
2659     // offset[7:6|2:1|5]
2660     uint32_t offsetL5 = (BitFieldExtract(u_offset, 6, 2) << 3) |
2661                         (BitFieldExtract(u_offset, 1, 2) << 1) |
2662                          BitFieldExtract(u_offset, 5, 1);
2663 
2664     EmitCB(funct3, BitFieldInsert(offsetL5, offsetH3, 5, 3), rs1_s, opcode);
2665   }
2666 
EmitCBArithmetic(uint32_t funct3,uint32_t funct2,uint32_t imm,XRegister rd_s,uint32_t opcode)2667   void EmitCBArithmetic(
2668       uint32_t funct3, uint32_t funct2, uint32_t imm, XRegister rd_s, uint32_t opcode) {
2669     uint32_t imm_5 = BitFieldExtract(imm, 5, 1);
2670     uint32_t immH3 = BitFieldInsert(funct2, imm_5, 2, 1);
2671     uint32_t immL5 = BitFieldExtract(imm, 0, 5);
2672 
2673     EmitCB(funct3, BitFieldInsert(immL5, immH3, 5, 3), rd_s, opcode);
2674   }
2675 
2676   // CJ-type instruction:
2677   //
2678   //   15  13 12                  2 1 0
2679   //   ---------------------------------
2680   //   [ . . | . . . . . . . . . . | . ]
2681   //   [func3    jump target 11     op ]
2682   //   ---------------------------------
2683   //
EmitCJ(uint32_t funct3,int32_t offset,uint32_t opcode)2684   void EmitCJ(uint32_t funct3, int32_t offset, uint32_t opcode) {
2685     DCHECK_ALIGNED(offset, 2);
2686     DCHECK(IsInt<12>(offset)) << offset;
2687     DCHECK(IsUint<3>(funct3));
2688     DCHECK(IsUint<2>(opcode));
2689 
2690     uint32_t uoffset = static_cast<uint32_t>(offset);
2691     // offset[11|4|9:8|10|6|7|3:1|5]
2692     uint32_t jumpt = (BitFieldExtract(uoffset, 11, 1) << 10) |
2693                      (BitFieldExtract(uoffset, 4, 1) << 9)   |
2694                      (BitFieldExtract(uoffset, 8, 2) << 7)   |
2695                      (BitFieldExtract(uoffset, 10, 1) << 6)  |
2696                      (BitFieldExtract(uoffset, 6, 1) << 5)   |
2697                      (BitFieldExtract(uoffset, 7, 1) << 4)   |
2698                      (BitFieldExtract(uoffset, 1, 3) << 1)   |
2699                       BitFieldExtract(uoffset, 5, 1);
2700 
2701     DCHECK(IsUint<11>(jumpt));
2702 
2703     uint32_t encoding = funct3 << 13 | jumpt << 2 | opcode;
2704     Emit16(encoding);
2705   }
2706 
2707   ArenaVector<Branch> branches_;
2708 
2709   // For checking that we finalize the code only once.
2710   bool finalized_;
2711 
2712   // Whether appending instructions at the end of the buffer or overwriting the existing ones.
2713   bool overwriting_;
2714   // The current overwrite location.
2715   uint32_t overwrite_location_;
2716 
2717   // Use `std::deque<>` for literal labels to allow insertions at the end
2718   // without invalidating pointers and references to existing elements.
2719   ArenaDeque<Literal> literals_;
2720   ArenaDeque<Literal> long_literals_;  // 64-bit literals separated for alignment reasons.
2721 
2722   // Jump table list.
2723   ArenaDeque<JumpTable> jump_tables_;
2724 
2725   // Data for `GetAdjustedPosition()`, see the description there.
2726   uint32_t last_position_adjustment_;
2727   uint32_t last_old_position_;
2728   uint32_t last_branch_id_;
2729 
2730   Riscv64ExtensionMask enabled_extensions_;
2731   uint32_t available_scratch_core_registers_;
2732   uint32_t available_scratch_fp_registers_;
2733 
2734   static constexpr uint32_t kXlen = 64;
2735 
2736   friend class ScopedExtensionsOverride;
2737   friend class ScratchRegisterScope;
2738 
2739   DISALLOW_COPY_AND_ASSIGN(Riscv64Assembler);
2740 };
2741 
2742 class ScopedExtensionsOverride {
2743  public:
ScopedExtensionsOverride(Riscv64Assembler * assembler,Riscv64ExtensionMask enabled_extensions)2744   ScopedExtensionsOverride(Riscv64Assembler* assembler, Riscv64ExtensionMask enabled_extensions)
2745       : assembler_(assembler),
2746         old_enabled_extensions_(assembler->enabled_extensions_) {
2747     assembler->enabled_extensions_ = enabled_extensions;
2748   }
2749 
~ScopedExtensionsOverride()2750   ~ScopedExtensionsOverride() {
2751     assembler_->enabled_extensions_ = old_enabled_extensions_;
2752   }
2753 
2754  protected:
GetEnabledExtensions(Riscv64Assembler * assembler)2755   static Riscv64ExtensionMask GetEnabledExtensions(Riscv64Assembler* assembler) {
2756     return assembler->enabled_extensions_;
2757   }
2758 
2759  private:
2760   Riscv64Assembler* const assembler_;
2761   const Riscv64ExtensionMask old_enabled_extensions_;
2762 };
2763 
2764 template <Riscv64ExtensionMask kMask>
2765 class ScopedExtensionsRestriction : public ScopedExtensionsOverride {
2766  public:
ScopedExtensionsRestriction(Riscv64Assembler * assembler)2767   explicit ScopedExtensionsRestriction(Riscv64Assembler* assembler)
2768       : ScopedExtensionsOverride(assembler, GetEnabledExtensions(assembler) & kMask) {}
2769 };
2770 
2771 template <Riscv64ExtensionMask kMask>
2772 class ScopedExtensionsInclusion : public ScopedExtensionsOverride {
2773  public:
ScopedExtensionsInclusion(Riscv64Assembler * assembler)2774   explicit ScopedExtensionsInclusion(Riscv64Assembler* assembler)
2775       : ScopedExtensionsOverride(assembler, GetEnabledExtensions(assembler) | kMask) {}
2776 };
2777 
2778 template <Riscv64ExtensionMask kMask>
2779 using ScopedExtensionsExclusion = ScopedExtensionsRestriction<~kMask>;
2780 
2781 using ScopedLrScExtensionsRestriction =
2782     ScopedExtensionsRestriction<kRiscv64LrScSequenceExtensionsMask>;
2783 
2784 class ScratchRegisterScope {
2785  public:
ScratchRegisterScope(Riscv64Assembler * assembler)2786   explicit ScratchRegisterScope(Riscv64Assembler* assembler)
2787       : assembler_(assembler),
2788         old_available_scratch_core_registers_(assembler->available_scratch_core_registers_),
2789         old_available_scratch_fp_registers_(assembler->available_scratch_fp_registers_) {}
2790 
~ScratchRegisterScope()2791   ~ScratchRegisterScope() {
2792     assembler_->available_scratch_core_registers_ = old_available_scratch_core_registers_;
2793     assembler_->available_scratch_fp_registers_ = old_available_scratch_fp_registers_;
2794   }
2795 
2796   // Alocate a scratch `XRegister`. There must be an available register to allocate.
AllocateXRegister()2797   XRegister AllocateXRegister() {
2798     CHECK_NE(assembler_->available_scratch_core_registers_, 0u);
2799     // Allocate the highest available scratch register (prefer TMP(T6) over TMP2(T5)).
2800     uint32_t reg_num = (BitSizeOf(assembler_->available_scratch_core_registers_) - 1u) -
2801                        CLZ(assembler_->available_scratch_core_registers_);
2802     assembler_->available_scratch_core_registers_ &= ~(1u << reg_num);
2803     DCHECK_LT(reg_num, enum_cast<uint32_t>(kNumberOfXRegisters));
2804     return enum_cast<XRegister>(reg_num);
2805   }
2806 
2807   // Free a previously unavailable core register for use as a scratch register.
2808   // This can be an arbitrary register, not necessarly the usual `TMP` or `TMP2`.
FreeXRegister(XRegister reg)2809   void FreeXRegister(XRegister reg) {
2810     uint32_t reg_num = enum_cast<uint32_t>(reg);
2811     DCHECK_LT(reg_num, enum_cast<uint32_t>(kNumberOfXRegisters));
2812     CHECK_EQ((1u << reg_num) & assembler_->available_scratch_core_registers_, 0u);
2813     assembler_->available_scratch_core_registers_ |= 1u << reg_num;
2814   }
2815 
2816   // The number of available scratch core registers.
AvailableXRegisters()2817   size_t AvailableXRegisters() {
2818     return POPCOUNT(assembler_->available_scratch_core_registers_);
2819   }
2820 
2821   // Make sure a core register is available for use as a scratch register.
IncludeXRegister(XRegister reg)2822   void IncludeXRegister(XRegister reg) {
2823     uint32_t reg_num = enum_cast<uint32_t>(reg);
2824     DCHECK_LT(reg_num, enum_cast<uint32_t>(kNumberOfXRegisters));
2825     assembler_->available_scratch_core_registers_ |= 1u << reg_num;
2826   }
2827 
2828   // Make sure a core register is not available for use as a scratch register.
ExcludeXRegister(XRegister reg)2829   void ExcludeXRegister(XRegister reg) {
2830     uint32_t reg_num = enum_cast<uint32_t>(reg);
2831     DCHECK_LT(reg_num, enum_cast<uint32_t>(kNumberOfXRegisters));
2832     assembler_->available_scratch_core_registers_ &= ~(1u << reg_num);
2833   }
2834 
2835   // Alocate a scratch `FRegister`. There must be an available register to allocate.
AllocateFRegister()2836   FRegister AllocateFRegister() {
2837     CHECK_NE(assembler_->available_scratch_fp_registers_, 0u);
2838     // Allocate the highest available scratch register (same as for core registers).
2839     uint32_t reg_num = (BitSizeOf(assembler_->available_scratch_fp_registers_) - 1u) -
2840                        CLZ(assembler_->available_scratch_fp_registers_);
2841     assembler_->available_scratch_fp_registers_ &= ~(1u << reg_num);
2842     DCHECK_LT(reg_num, enum_cast<uint32_t>(kNumberOfFRegisters));
2843     return enum_cast<FRegister>(reg_num);
2844   }
2845 
2846   // Free a previously unavailable FP register for use as a scratch register.
2847   // This can be an arbitrary register, not necessarly the usual `FTMP`.
FreeFRegister(FRegister reg)2848   void FreeFRegister(FRegister reg) {
2849     uint32_t reg_num = enum_cast<uint32_t>(reg);
2850     DCHECK_LT(reg_num, enum_cast<uint32_t>(kNumberOfFRegisters));
2851     CHECK_EQ((1u << reg_num) & assembler_->available_scratch_fp_registers_, 0u);
2852     assembler_->available_scratch_fp_registers_ |= 1u << reg_num;
2853   }
2854 
2855   // The number of available scratch FP registers.
AvailableFRegisters()2856   size_t AvailableFRegisters() {
2857     return POPCOUNT(assembler_->available_scratch_fp_registers_);
2858   }
2859 
2860   // Make sure an FP register is available for use as a scratch register.
IncludeFRegister(FRegister reg)2861   void IncludeFRegister(FRegister reg) {
2862     uint32_t reg_num = enum_cast<uint32_t>(reg);
2863     DCHECK_LT(reg_num, enum_cast<uint32_t>(kNumberOfFRegisters));
2864     assembler_->available_scratch_fp_registers_ |= 1u << reg_num;
2865   }
2866 
2867   // Make sure an FP register is not available for use as a scratch register.
ExcludeFRegister(FRegister reg)2868   void ExcludeFRegister(FRegister reg) {
2869     uint32_t reg_num = enum_cast<uint32_t>(reg);
2870     DCHECK_LT(reg_num, enum_cast<uint32_t>(kNumberOfFRegisters));
2871     assembler_->available_scratch_fp_registers_ &= ~(1u << reg_num);
2872   }
2873 
2874  private:
2875   Riscv64Assembler* const assembler_;
2876   const uint32_t old_available_scratch_core_registers_;
2877   const uint32_t old_available_scratch_fp_registers_;
2878 
2879   DISALLOW_COPY_AND_ASSIGN(ScratchRegisterScope);
2880 };
2881 
2882 constexpr Riscv64ExtensionMask kRiscv64CompressedExtensionsMask =
2883     Riscv64ExtensionBit(Riscv64Extension::kZca) |
2884     Riscv64ExtensionBit(Riscv64Extension::kZcd) |
2885     Riscv64ExtensionBit(Riscv64Extension::kZcb);
2886 
2887 using ScopedNoCInstructions = ScopedExtensionsExclusion<kRiscv64CompressedExtensionsMask>;
2888 using ScopedUseCInstructions = ScopedExtensionsInclusion<kRiscv64CompressedExtensionsMask>;
2889 
2890 }  // namespace riscv64
2891 }  // namespace art
2892 
2893 #endif  // ART_COMPILER_UTILS_RISCV64_ASSEMBLER_RISCV64_H_
2894