• Home
  • History
  • Annotate
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2023 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef BERBERIS_LITE_TRANSLATOR_RISCV64_TO_X86_64_H_
18 #define BERBERIS_LITE_TRANSLATOR_RISCV64_TO_X86_64_H_
19 
20 #include <cstdint>
21 #include <tuple>
22 #include <variant>
23 
24 #include "berberis/assembler/common.h"
25 #include "berberis/assembler/x86_64.h"
26 #include "berberis/base/checks.h"
27 #include "berberis/base/dependent_false.h"
28 #include "berberis/base/macros.h"
29 #include "berberis/decoder/riscv64/decoder.h"
30 #include "berberis/decoder/riscv64/semantics_player.h"
31 #include "berberis/guest_state/guest_addr.h"
32 #include "berberis/guest_state/guest_state.h"
33 #include "berberis/intrinsics/intrinsics.h"
34 #include "berberis/intrinsics/intrinsics_float.h"
35 #include "berberis/intrinsics/macro_assembler.h"
36 #include "berberis/lite_translator/lite_translate_region.h"
37 #include "berberis/runtime_primitives/platform.h"
38 
39 #include "allocator.h"
40 #include "call_intrinsic.h"
41 #include "inline_intrinsic.h"
42 #include "register_maintainer.h"
43 
44 namespace berberis {
45 
46 class MachindeCode;
47 
48 class LiteTranslator {
49  public:
50   using Assembler = MacroAssembler<x86_64::Assembler>;
51   using CsrName = berberis::CsrName;
52   using Decoder = Decoder<SemanticsPlayer<LiteTranslator>>;
53   using Register = Assembler::Register;
54   // Note: on RISC-V architecture FP register and SIMD registers are disjoint, but on x86 they are
55   // the same.
56   using FpRegister = Assembler::XMMRegister;
57   using SimdRegister = Assembler::XMMRegister;
58   using Condition = Assembler::Condition;
59   using Float32 = intrinsics::Float32;
60   using Float64 = intrinsics::Float64;
61 
62   explicit LiteTranslator(MachineCode* machine_code,
63                           GuestAddr pc,
64                           LiteTranslateParams params = LiteTranslateParams{})
as_(machine_code)65       : as_(machine_code),
66         success_(true),
67         pc_(pc),
68         params_(params),
69         is_region_end_reached_(false){};
70 
71   //
72   // Instruction implementations.
73   //
74 
75   Register Op(Decoder::OpOpcode opcode, Register arg1, Register arg2);
76   Register Op32(Decoder::Op32Opcode opcode, Register arg1, Register arg2);
77   Register OpImm(Decoder::OpImmOpcode opcode, Register arg, int16_t imm);
78   Register OpImm32(Decoder::OpImm32Opcode opcode, Register arg, int16_t imm);
79   Register Slli(Register arg, int8_t imm);
80   Register Srli(Register arg, int8_t imm);
81   Register Srai(Register arg, int8_t imm);
82   Register ShiftImm32(Decoder::ShiftImm32Opcode opcode, Register arg, uint16_t imm);
83   Register Rori(Register arg, int8_t shamt);
84   Register Roriw(Register arg, int8_t shamt);
85   Register Lui(int32_t imm);
86   Register Auipc(int32_t imm);
87   void CompareAndBranch(Decoder::BranchOpcode opcode, Register arg1, Register arg2, int16_t offset);
88   void Branch(int32_t offset);
89   void BranchRegister(Register base, int16_t offset);
90   void ExitGeneratedCode(GuestAddr target);
91   void ExitRegion(GuestAddr target);
92   void ExitRegionIndirect(Register target);
93   void Store(Decoder::MemoryDataOperandType operand_type,
94              Register arg,
95              int16_t offset,
96              Register data);
97   Register Load(Decoder::LoadOperandType operand_type, Register arg, int16_t offset);
98 
Ecall(Register syscall_nr,Register arg0,Register arg1,Register arg2,Register arg3,Register arg4,Register arg5)99   Register Ecall(Register syscall_nr,
100                  Register arg0,
101                  Register arg1,
102                  Register arg2,
103                  Register arg3,
104                  Register arg4,
105                  Register arg5) {
106     UNUSED(syscall_nr, arg0, arg1, arg2, arg3, arg4, arg5);
107     Undefined();
108     return {};
109   }
110 
Fence(Decoder::FenceOpcode,Register,bool sw,bool sr,bool,bool,bool pw,bool pr,bool,bool)111   void Fence(Decoder::FenceOpcode /*opcode*/,
112              Register /*src*/,
113              bool sw,
114              bool sr,
115              bool /*so*/,
116              bool /*si*/,
117              bool pw,
118              bool pr,
119              bool /*po*/,
120              bool /*pi*/) {
121     UNUSED(sw, sr, pw, pr);
122     Undefined();
123   }
124 
Nop()125   void Nop() {}
126 
127   //
128   // Csr
129   //
130 
131   Register UpdateCsr(Decoder::CsrOpcode opcode, Register arg, Register csr);
132   Register UpdateCsr(Decoder::CsrImmOpcode opcode, uint8_t imm, Register csr);
133 
134   //
135   // F and D extensions.
136   //
137 
138   template <typename DataType>
LoadFp(Register arg,int16_t offset)139   FpRegister LoadFp(Register arg, int16_t offset) {
140     FpRegister res = AllocTempSimdReg();
141     as_.Movs<DataType>(res, {.base = arg, .disp = offset});
142     return res;
143   }
144 
145   template <typename DataType>
StoreFp(Register arg,int16_t offset,FpRegister data)146   void StoreFp(Register arg, int16_t offset, FpRegister data) {
147     as_.Movs<DataType>({.base = arg, .disp = offset}, data);
148   }
149 
Fmv(FpRegister arg)150   FpRegister Fmv(FpRegister arg) {
151     SimdRegister res = AllocTempSimdReg();
152     if (host_platform::kHasAVX) {
153       as_.Vmovapd(res, arg);
154     } else {
155       as_.Vmovaps(res, arg);
156     }
157     return res;
158   }
159 
160   //
161   // V extension.
162   //
163 
164   template <typename VOpArgs, typename... ExtraAegs>
OpVector(const VOpArgs &,ExtraAegs...)165   void OpVector(const VOpArgs& /*args*/, ExtraAegs... /*extra_args*/) {
166     // TODO(300690740): develop and implement strategy which would allow us to support vector
167     // intrinsics not just in the interpreter.
168     Undefined();
169   }
170 
171   //
172   // Guest state getters/setters.
173   //
174 
GetInsnAddr()175   GuestAddr GetInsnAddr() const { return pc_; }
176 
GetReg(uint8_t reg)177   Register GetReg(uint8_t reg) {
178     CHECK_GT(reg, 0);
179     CHECK_LT(reg, std::size(ThreadState{}.cpu.x));
180     if (IsRegMappingEnabled()) {
181       auto [mapped_reg, is_new_mapping] = GetMappedRegisterOrMap(reg);
182       if (is_new_mapping) {
183         int32_t offset = offsetof(ThreadState, cpu.x[0]) + reg * 8;
184         as_.Movq(mapped_reg, {.base = as_.rbp, .disp = offset});
185       }
186       return mapped_reg;
187     }
188     Register result = AllocTempReg();
189     int32_t offset = offsetof(ThreadState, cpu.x[0]) + reg * 8;
190     as_.Movq(result, {.base = as_.rbp, .disp = offset});
191     return result;
192   }
193 
SetReg(uint8_t reg,Register value)194   void SetReg(uint8_t reg, Register value) {
195     CHECK_GT(reg, 0);
196     CHECK_LT(reg, std::size(ThreadState{}.cpu.x));
197     CHECK_LE(reg, kNumGuestRegs);
198     if (IsRegMappingEnabled()) {
199       auto [mapped_reg, _] = GetMappedRegisterOrMap(reg);
200       if (success()) {
201         as_.Movq(mapped_reg, value);
202         gp_maintainer_.NoticeModified(reg);
203       }
204       return;
205     }
206     int32_t offset = offsetof(ThreadState, cpu.x[0]) + reg * 8;
207     as_.Movq({.base = as_.rbp, .disp = offset}, value);
208   }
209 
StoreMappedRegs()210   void StoreMappedRegs() {
211     if (!IsRegMappingEnabled()) {
212       return;
213     }
214     for (int i = 0; i < int(kNumGuestRegs); i++) {
215       if (gp_maintainer_.IsModified(i)) {
216         auto mapped_reg = gp_maintainer_.GetMapped(i);
217         int32_t offset = offsetof(ThreadState, cpu.x[0]) + i * 8;
218         as_.Movq({.base = as_.rbp, .disp = offset}, mapped_reg);
219       }
220     }
221     for (int i = 0; i < int(kNumGuestFpRegs); i++) {
222       if (simd_maintainer_.IsModified(i)) {
223         auto mapped_reg = simd_maintainer_.GetMapped(i);
224         int32_t offset = offsetof(ThreadState, cpu.f) + i * sizeof(Float64);
225         StoreFpReg(mapped_reg, offset);
226       }
227     }
228   }
229 
GetFpReg(uint8_t reg)230   FpRegister GetFpReg(uint8_t reg) {
231     CHECK_LT(reg, std::size(ThreadState{}.cpu.f));
232     CHECK_LE(reg, kNumGuestFpRegs);
233     if (IsRegMappingEnabled()) {
234       auto [mapped_reg, is_new_mapping] = GetMappedFpRegOrMap(reg);
235       if (is_new_mapping) {
236         int32_t offset = offsetof(ThreadState, cpu.f) + reg * sizeof(Float64);
237         as_.Movsd(mapped_reg, {.base = Assembler::rbp, .disp = offset});
238       }
239       return mapped_reg;
240     }
241     SimdRegister result = AllocTempSimdReg();
242     int32_t offset = offsetof(ThreadState, cpu.f) + reg * sizeof(Float64);
243     as_.Movsd(result, {.base = Assembler::rbp, .disp = offset});
244     return result;
245   }
246 
247   template <typename FloatType>
GetFRegAndUnboxNan(uint8_t reg)248   FpRegister GetFRegAndUnboxNan(uint8_t reg) {
249     SimdRegister result = GetFpReg(reg);
250     SimdRegister unboxed_result = AllocTempSimdReg();
251     if (host_platform::kHasAVX) {
252       as_.MacroUnboxNanAVX<FloatType>(unboxed_result, result);
253     } else {
254       as_.MacroUnboxNan<FloatType>(unboxed_result, result);
255     }
256     return unboxed_result;
257   }
258 
259   template <typename FloatType>
NanBoxFpReg(FpRegister value)260   void NanBoxFpReg(FpRegister value) {
261     if (host_platform::kHasAVX) {
262       as_.MacroNanBoxAVX<FloatType>(value, value);
263       return;
264     }
265     as_.MacroNanBox<FloatType>(value);
266   }
267 
268   template <typename FloatType>
NanBoxAndSetFpReg(uint8_t reg,FpRegister value)269   void NanBoxAndSetFpReg(uint8_t reg, FpRegister value) {
270     CHECK_LT(reg, std::size(ThreadState{}.cpu.f));
271     int32_t offset = offsetof(ThreadState, cpu.f) + reg * sizeof(Float64);
272     NanBoxFpReg<FloatType>(value);
273 
274     if (IsRegMappingEnabled()) {
275       auto [mapped_reg, _] = GetMappedFpRegOrMap(reg);
276       if (success()) {
277         // Operand type doesn't matter.
278         MoveFpReg(mapped_reg, value);
279         simd_maintainer_.NoticeModified(reg);
280       }
281       return;
282     }
283 
284     StoreFpReg(value, offset);
285   }
286 
287   //
288   // Various helper methods.
289   //
290 
291   template <CsrName kName>
GetCsr()292   [[nodiscard]] Register GetCsr() {
293     Register csr_reg = AllocTempReg();
294     as_.Expand<uint64_t, CsrFieldType<kName>>(
295         csr_reg, {.base = Assembler::rbp, .disp = kCsrFieldOffset<kName>});
296     return csr_reg;
297   }
298 
299   template <CsrName kName>
SetCsr(uint8_t imm)300   void SetCsr(uint8_t imm) {
301     // Note: csr immediate only have 5 bits in RISC-V encoding which guarantess us that
302     // “imm & kCsrMask<kName>”can be used as 8-bit immediate.
303     as_.Mov<CsrFieldType<kName>>({.base = Assembler::rbp, .disp = kCsrFieldOffset<kName>},
304                                  static_cast<int8_t>(imm & kCsrMask<kName>));
305   }
306 
307   template <CsrName kName>
SetCsr(Register arg)308   void SetCsr(Register arg) {
309     // Use RCX as temporary register.
310     as_.Mov<CsrFieldType<kName>>(Assembler::rcx, arg);
311     if constexpr (sizeof(CsrFieldType<kName>) <= sizeof(int32_t)) {
312       as_.And<CsrFieldType<kName>>(Assembler::rcx, kCsrMask<kName>);
313     } else {
314       as_.And<CsrFieldType<kName>>(Assembler::rcx,
315                                    {.disp = constants_pool::kConst<uint64_t{kCsrMask<kName>}>});
316     }
317     as_.Mov<CsrFieldType<kName>>({.base = Assembler::rbp, .disp = kCsrFieldOffset<kName>},
318                                  Assembler::rcx);
319   }
320 
GetImm(uint64_t imm)321   [[nodiscard]] Register GetImm(uint64_t imm) {
322     Register imm_reg = AllocTempReg();
323     as_.Movq(imm_reg, imm);
324     return imm_reg;
325   }
326 
Copy(Register value)327   [[nodiscard]] Register Copy(Register value) {
328     Register result = AllocTempReg();
329     as_.Movq(result, value);
330     return result;
331   }
332 
Undefined()333   void Undefined() { success_ = false; }
334 
gp_maintainer()335   RegisterFileMaintainer<Register, kNumGuestRegs>* gp_maintainer() { return &gp_maintainer_; }
simd_maintainer()336   RegisterFileMaintainer<SimdRegister, kNumGuestFpRegs>* simd_maintainer() {
337     return &simd_maintainer_;
338   }
as()339   [[nodiscard]] Assembler* as() { return &as_; }
success()340   [[nodiscard]] bool success() const { return success_; }
341 
FreeTempRegs()342   void FreeTempRegs() {
343     gp_allocator_.FreeTemps();
344     simd_allocator_.FreeTemps();
345   }
346 
StoreFpReg(FpRegister value,int32_t offset)347   void StoreFpReg(FpRegister value, int32_t offset) {
348     if (host_platform::kHasAVX) {
349       as_.Vmovsd({.base = Assembler::rbp, .disp = offset}, value);
350     } else {
351       as_.Movsd({.base = Assembler::rbp, .disp = offset}, value);
352     }
353   }
354 
MoveFpReg(FpRegister reg,FpRegister value)355   void MoveFpReg(FpRegister reg, FpRegister value) {
356     if (host_platform::kHasAVX) {
357       as_.Vmovsd(reg, value, value);
358     } else {
359       as_.Movsd(reg, value);
360     }
361   }
362 
363 #include "berberis/intrinsics/translator_intrinsics_hooks-inl.h"
364 
is_region_end_reached()365   bool is_region_end_reached() const { return is_region_end_reached_; }
366 
IncrementInsnAddr(uint8_t insn_size)367   void IncrementInsnAddr(uint8_t insn_size) { pc_ += insn_size; }
368 
IsRegMappingEnabled()369   bool IsRegMappingEnabled() { return params_.enable_reg_mapping; }
370 
GetMappedRegisterOrMap(int reg)371   std::tuple<Register, bool> GetMappedRegisterOrMap(int reg) {
372     if (gp_maintainer_.IsMapped(reg)) {
373       return {gp_maintainer_.GetMapped(reg), false};
374     }
375 
376     if (auto alloc_result = gp_allocator_.Alloc()) {
377       gp_maintainer_.Map(reg, alloc_result.value());
378       return {alloc_result.value(), true};
379     }
380     success_ = false;
381     return {{}, false};
382   }
383 
GetMappedFpRegOrMap(int reg)384   std::tuple<SimdRegister, bool> GetMappedFpRegOrMap(int reg) {
385     if (simd_maintainer_.IsMapped(reg)) {
386       return {simd_maintainer_.GetMapped(reg), false};
387     }
388 
389     if (auto alloc_result = simd_allocator_.Alloc()) {
390       simd_maintainer_.Map(reg, alloc_result.value());
391       return {alloc_result.value(), true};
392     }
393     success_ = false;
394     return {{}, false};
395   }
396 
AllocTempReg()397   Register AllocTempReg() {
398     if (auto reg_option = gp_allocator_.AllocTemp()) {
399       return reg_option.value();
400     }
401     success_ = false;
402     return {};
403   };
404 
AllocTempSimdReg()405   SimdRegister AllocTempSimdReg() {
406     if (auto reg_option = simd_allocator_.AllocTemp()) {
407       return reg_option.value();
408     }
409     success_ = false;
410     return {};
411   };
412 
413   template <typename IntType, bool aq, bool rl>
Lr(Register)414   Register Lr(Register /* addr */) {
415     Undefined();
416     return {};
417   }
418 
419   template <typename IntType, bool aq, bool rl>
Sc(Register,Register)420   Register Sc(Register /* addr */, Register /* data */) {
421     Undefined();
422     return {};
423   }
424 
425  private:
426   template <auto kFunction, typename AssemblerResType, typename... AssemblerArgType>
CallIntrinsic(AssemblerArgType...args)427   AssemblerResType CallIntrinsic(AssemblerArgType... args) {
428     if constexpr (std::is_same_v<AssemblerResType, void>) {
429       if (inline_intrinsic::TryInlineIntrinsic<kFunction>(
430               as_,
431               [this]() { return AllocTempReg(); },
432               [this]() { return AllocTempSimdReg(); },
433               std::monostate{},
434               args...)) {
435         return;
436       }
437       call_intrinsic::CallIntrinsic<AssemblerResType>(as_, kFunction, args...);
438     } else {
439       AssemblerResType result;
440       if constexpr (std::is_same_v<AssemblerResType, Register>) {
441         result = AllocTempReg();
442       } else if constexpr (std::is_same_v<AssemblerResType, std::tuple<Register, Register>>) {
443         result = std::tuple{AllocTempReg(), AllocTempReg()};
444       } else if constexpr (std::is_same_v<AssemblerResType, SimdRegister>) {
445         result = AllocTempSimdReg();
446       } else {
447         // This should not be reached by the compiler. If it is - there is a new result type that
448         // needs to be supported.
449         static_assert(kDependentTypeFalse<AssemblerResType>, "Unsupported result type");
450       }
451 
452       if (inline_intrinsic::TryInlineIntrinsic<kFunction>(
453               as_,
454               [this]() { return AllocTempReg(); },
455               [this]() { return AllocTempSimdReg(); },
456               result,
457               args...)) {
458         return result;
459       }
460 
461       call_intrinsic::CallIntrinsic<AssemblerResType>(as_, kFunction, result, args...);
462 
463       return result;
464     }
465   }
466 
467   Assembler as_;
468   bool success_;
469   GuestAddr pc_;
470   Allocator<Register> gp_allocator_;
471   RegisterFileMaintainer<Register, kNumGuestRegs> gp_maintainer_;
472   RegisterFileMaintainer<SimdRegister, kNumGuestFpRegs> simd_maintainer_;
473   Allocator<SimdRegister> simd_allocator_;
474   const LiteTranslateParams params_;
475   bool is_region_end_reached_;
476 };
477 
478 template <>
479 [[nodiscard]] inline LiteTranslator::Register LiteTranslator::GetCsr<CsrName::kCycle>() {
480   return CPUClockCount();
481 }
482 
483 template <>
484 [[nodiscard]] inline LiteTranslator::Register LiteTranslator::GetCsr<CsrName::kFCsr>() {
485   Register csr_reg = AllocTempReg();
486   bool inline_succeful = inline_intrinsic::TryInlineIntrinsic<&intrinsics::FeGetExceptions>(
487       as_,
488       [this]() { return AllocTempReg(); },
489       [this]() { return AllocTempSimdReg(); },
490       Assembler::rax);
491   CHECK(inline_succeful);
492   as_.Expand<uint64_t, CsrFieldType<CsrName::kFrm>>(
493       csr_reg, {.base = Assembler::rbp, .disp = kCsrFieldOffset<CsrName::kFrm>});
494   as_.Shl<uint8_t>(csr_reg, 5);
495   as_.Or<uint8_t>(csr_reg, as_.rax);
496   return csr_reg;
497 }
498 
499 template <>
500 [[nodiscard]] inline LiteTranslator::Register LiteTranslator::GetCsr<CsrName::kFFlags>() {
501   return FeGetExceptions();
502 }
503 
504 template <>
505 [[nodiscard]] inline LiteTranslator::Register LiteTranslator::GetCsr<CsrName::kVlenb>() {
506   return GetImm(16);
507 }
508 
509 template <>
510 [[nodiscard]] inline LiteTranslator::Register LiteTranslator::GetCsr<CsrName::kVxrm>() {
511   Register reg = AllocTempReg();
512   as_.Expand<uint64_t, uint8_t>(reg,
513                                 {.base = Assembler::rbp, .disp = kCsrFieldOffset<CsrName::kVcsr>});
514   as_.And<uint8_t>(reg, 0b11);
515   return reg;
516 }
517 
518 template <>
519 [[nodiscard]] inline LiteTranslator::Register LiteTranslator::GetCsr<CsrName::kVxsat>() {
520   Register reg = AllocTempReg();
521   as_.Expand<uint64_t, uint8_t>(reg,
522                                 {.base = Assembler::rbp, .disp = kCsrFieldOffset<CsrName::kVcsr>});
523   as_.Shr<uint8_t>(reg, 2);
524   return reg;
525 }
526 
527 template <>
528 inline void LiteTranslator::SetCsr<CsrName::kFCsr>(uint8_t imm) {
529   // Note: instructions Csrrci or Csrrsi couldn't affect Frm because immediate only has five bits.
530   // But these instruction don't pass their immediate-specified argument into `SetCsr`, they combine
531   // it with register first. Fixing that can only be done by changing code in the semantics player.
532   //
533   // But Csrrwi may clear it.  And we actually may only arrive here from Csrrwi.
534   // Thus, technically, we know that imm >> 5 is always zero, but it doesn't look like a good idea
535   // to rely on that: it's very subtle and it only affects code generation speed.
536   as_.Mov<uint8_t>({.base = Assembler::rbp, .disp = kCsrFieldOffset<CsrName::kFrm>},
537                    static_cast<int8_t>(imm >> 5));
538   as_.MacroFeSetExceptionsAndRoundImmTranslate(
539       {Assembler::rbp, .disp = static_cast<int>(offsetof(ThreadState, intrinsics_scratch_area))},
540       imm);
541 }
542 
543 template <>
544 inline void LiteTranslator::SetCsr<CsrName::kFCsr>(Register arg) {
545   // Use RAX as temporary register for exceptions and RCX for rm.
546   // We know RCX would be used by FeSetRound, too.
547   as_.Mov<uint8_t>(Assembler::rax, arg);
548   as_.And<uint32_t>(Assembler::rax, 0b1'1111);
549   as_.Shldl(Assembler::rcx, arg, int8_t{32 - 5});
550   as_.And<uint8_t>(Assembler::rcx, kCsrMask<CsrName::kFrm>);
551   as_.Mov<uint8_t>({.base = Assembler::rbp, .disp = kCsrFieldOffset<CsrName::kFrm>},
552                    Assembler::rcx);
553   as_.MacroFeSetExceptionsAndRoundTranslate(
554       Assembler::rax,
555       {Assembler::rbp, .disp = static_cast<int>(offsetof(ThreadState, intrinsics_scratch_area))},
556       Assembler::rax);
557 }
558 
559 template <>
560 inline void LiteTranslator::SetCsr<CsrName::kFFlags>(uint8_t imm) {
561   FeSetExceptionsImm(static_cast<int8_t>(imm & 0b1'1111));
562 }
563 
564 template <>
565 inline void LiteTranslator::SetCsr<CsrName::kFFlags>(Register arg) {
566   // Use RAX as temporary register.
567   as_.Mov<uint8_t>(Assembler::rax, arg);
568   as_.And<uint32_t>(Assembler::rax, 0b1'1111);
569   FeSetExceptions(Assembler::rax);
570 }
571 
572 template <>
573 inline void LiteTranslator::SetCsr<CsrName::kFrm>(uint8_t imm) {
574   as_.Mov<uint8_t>({.base = Assembler::rbp, .disp = kCsrFieldOffset<CsrName::kFrm>},
575                    static_cast<int8_t>(imm & kCsrMask<CsrName::kFrm>));
576   FeSetRoundImm(static_cast<int8_t>(imm & kCsrMask<CsrName::kFrm>));
577 }
578 
579 template <>
580 inline void LiteTranslator::SetCsr<CsrName::kFrm>(Register arg) {
581   // Use RCX as temporary register. We know it would be used by FeSetRound, too.
582   as_.Mov<uint8_t>(Assembler::rcx, arg);
583   as_.And<uint8_t>(Assembler::rcx, kCsrMask<CsrName::kFrm>);
584   as_.Mov<uint8_t>({.base = Assembler::rbp, .disp = kCsrFieldOffset<CsrName::kFrm>},
585                    Assembler::rcx);
586   FeSetRound(Assembler::rcx);
587 }
588 
589 template <>
590 inline void LiteTranslator::SetCsr<CsrName::kVxrm>(uint8_t imm) {
591   imm &= 0b11;
592   if (imm != 0b11) {
593     as_.And<uint8_t>({.base = Assembler::rbp, .disp = kCsrFieldOffset<CsrName::kVcsr>}, 0b100);
594   }
595   if (imm != 0b00) {
596     as_.Or<uint8_t>({.base = Assembler::rbp, .disp = kCsrFieldOffset<CsrName::kVcsr>}, imm);
597   }
598 }
599 
600 template <>
601 inline void LiteTranslator::SetCsr<CsrName::kVxrm>(Register arg) {
602   as_.And<uint8_t>({.base = Assembler::rbp, .disp = kCsrFieldOffset<CsrName::kVcsr>}, 0b100);
603   as_.And<uint8_t>(arg, 0b11);
604   as_.Or<uint8_t>({.base = Assembler::rbp, .disp = kCsrFieldOffset<CsrName::kVcsr>}, arg);
605 }
606 
607 template <>
608 inline void LiteTranslator::SetCsr<CsrName::kVxsat>(uint8_t imm) {
609   if (imm & 0b1) {
610     as_.Or<uint8_t>({.base = Assembler::rbp, .disp = kCsrFieldOffset<CsrName::kVcsr>}, 0b100);
611   } else {
612     as_.And<uint8_t>({.base = Assembler::rbp, .disp = kCsrFieldOffset<CsrName::kVcsr>}, 0b11);
613   }
614 }
615 
616 template <>
617 inline void LiteTranslator::SetCsr<CsrName::kVxsat>(Register arg) {
618   as_.And<uint8_t>({.base = Assembler::rbp, .disp = kCsrFieldOffset<CsrName::kVcsr>}, 0b11);
619   as_.Test<uint8_t>(arg, 1);
620   // Use RCX as temporary register.
621   as_.Setcc(Condition::kNotZero, as_.rcx);
622   as_.Shl<uint8_t>(as_.rcx, int8_t{2});
623   as_.Or<uint8_t>({.base = Assembler::rbp, .disp = kCsrFieldOffset<CsrName::kVcsr>}, as_.rcx);
624 }
625 
626 // There is no NanBoxing for Float64 except on CPUs with Float128 support.
627 template <>
628 inline LiteTranslator::FpRegister LiteTranslator::GetFRegAndUnboxNan<LiteTranslator::Float64>(
629     uint8_t reg) {
630   SimdRegister result = GetFpReg(reg);
631   return result;
632 }
633 
634 template <>
635 inline void LiteTranslator::NanBoxFpReg<LiteTranslator::Float64>(FpRegister) {}
636 
637 }  // namespace berberis
638 
639 #endif  // BERBERIS_LITE_TRANSLATOR_RISCV64_TO_X86_64_H_
640