1 /* 2 * Copyright (C) 2023 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef BERBERIS_LITE_TRANSLATOR_RISCV64_TO_X86_64_H_ 18 #define BERBERIS_LITE_TRANSLATOR_RISCV64_TO_X86_64_H_ 19 20 #include <cstdint> 21 #include <tuple> 22 #include <variant> 23 24 #include "berberis/assembler/common.h" 25 #include "berberis/assembler/x86_64.h" 26 #include "berberis/base/checks.h" 27 #include "berberis/base/dependent_false.h" 28 #include "berberis/base/macros.h" 29 #include "berberis/decoder/riscv64/decoder.h" 30 #include "berberis/decoder/riscv64/semantics_player.h" 31 #include "berberis/guest_state/guest_addr.h" 32 #include "berberis/guest_state/guest_state.h" 33 #include "berberis/intrinsics/intrinsics.h" 34 #include "berberis/intrinsics/intrinsics_float.h" 35 #include "berberis/intrinsics/macro_assembler.h" 36 #include "berberis/lite_translator/lite_translate_region.h" 37 #include "berberis/runtime_primitives/platform.h" 38 39 #include "allocator.h" 40 #include "call_intrinsic.h" 41 #include "inline_intrinsic.h" 42 #include "register_maintainer.h" 43 44 namespace berberis { 45 46 class MachindeCode; 47 48 class LiteTranslator { 49 public: 50 using Assembler = MacroAssembler<x86_64::Assembler>; 51 using CsrName = berberis::CsrName; 52 using Decoder = Decoder<SemanticsPlayer<LiteTranslator>>; 53 using Register = Assembler::Register; 54 // Note: on RISC-V architecture FP register and SIMD registers are disjoint, but on x86 they are 55 // the same. 56 using FpRegister = Assembler::XMMRegister; 57 using SimdRegister = Assembler::XMMRegister; 58 using Condition = Assembler::Condition; 59 using Float32 = intrinsics::Float32; 60 using Float64 = intrinsics::Float64; 61 62 explicit LiteTranslator(MachineCode* machine_code, 63 GuestAddr pc, 64 LiteTranslateParams params = LiteTranslateParams{}) as_(machine_code)65 : as_(machine_code), 66 success_(true), 67 pc_(pc), 68 params_(params), 69 is_region_end_reached_(false){}; 70 71 // 72 // Instruction implementations. 73 // 74 75 Register Op(Decoder::OpOpcode opcode, Register arg1, Register arg2); 76 Register Op32(Decoder::Op32Opcode opcode, Register arg1, Register arg2); 77 Register OpImm(Decoder::OpImmOpcode opcode, Register arg, int16_t imm); 78 Register OpImm32(Decoder::OpImm32Opcode opcode, Register arg, int16_t imm); 79 Register Slli(Register arg, int8_t imm); 80 Register Srli(Register arg, int8_t imm); 81 Register Srai(Register arg, int8_t imm); 82 Register ShiftImm32(Decoder::ShiftImm32Opcode opcode, Register arg, uint16_t imm); 83 Register Rori(Register arg, int8_t shamt); 84 Register Roriw(Register arg, int8_t shamt); 85 Register Lui(int32_t imm); 86 Register Auipc(int32_t imm); 87 void CompareAndBranch(Decoder::BranchOpcode opcode, Register arg1, Register arg2, int16_t offset); 88 void Branch(int32_t offset); 89 void BranchRegister(Register base, int16_t offset); 90 void ExitGeneratedCode(GuestAddr target); 91 void ExitRegion(GuestAddr target); 92 void ExitRegionIndirect(Register target); 93 void Store(Decoder::MemoryDataOperandType operand_type, 94 Register arg, 95 int16_t offset, 96 Register data); 97 Register Load(Decoder::LoadOperandType operand_type, Register arg, int16_t offset); 98 Ecall(Register syscall_nr,Register arg0,Register arg1,Register arg2,Register arg3,Register arg4,Register arg5)99 Register Ecall(Register syscall_nr, 100 Register arg0, 101 Register arg1, 102 Register arg2, 103 Register arg3, 104 Register arg4, 105 Register arg5) { 106 UNUSED(syscall_nr, arg0, arg1, arg2, arg3, arg4, arg5); 107 Undefined(); 108 return {}; 109 } 110 Fence(Decoder::FenceOpcode,Register,bool sw,bool sr,bool,bool,bool pw,bool pr,bool,bool)111 void Fence(Decoder::FenceOpcode /*opcode*/, 112 Register /*src*/, 113 bool sw, 114 bool sr, 115 bool /*so*/, 116 bool /*si*/, 117 bool pw, 118 bool pr, 119 bool /*po*/, 120 bool /*pi*/) { 121 UNUSED(sw, sr, pw, pr); 122 Undefined(); 123 } 124 Nop()125 void Nop() {} 126 127 // 128 // Csr 129 // 130 131 Register UpdateCsr(Decoder::CsrOpcode opcode, Register arg, Register csr); 132 Register UpdateCsr(Decoder::CsrImmOpcode opcode, uint8_t imm, Register csr); 133 134 // 135 // F and D extensions. 136 // 137 138 template <typename DataType> LoadFp(Register arg,int16_t offset)139 FpRegister LoadFp(Register arg, int16_t offset) { 140 FpRegister res = AllocTempSimdReg(); 141 as_.Movs<DataType>(res, {.base = arg, .disp = offset}); 142 return res; 143 } 144 145 template <typename DataType> StoreFp(Register arg,int16_t offset,FpRegister data)146 void StoreFp(Register arg, int16_t offset, FpRegister data) { 147 as_.Movs<DataType>({.base = arg, .disp = offset}, data); 148 } 149 Fmv(FpRegister arg)150 FpRegister Fmv(FpRegister arg) { 151 SimdRegister res = AllocTempSimdReg(); 152 if (host_platform::kHasAVX) { 153 as_.Vmovapd(res, arg); 154 } else { 155 as_.Vmovaps(res, arg); 156 } 157 return res; 158 } 159 160 // 161 // V extension. 162 // 163 164 template <typename VOpArgs, typename... ExtraAegs> OpVector(const VOpArgs &,ExtraAegs...)165 void OpVector(const VOpArgs& /*args*/, ExtraAegs... /*extra_args*/) { 166 // TODO(300690740): develop and implement strategy which would allow us to support vector 167 // intrinsics not just in the interpreter. 168 Undefined(); 169 } 170 171 // 172 // Guest state getters/setters. 173 // 174 GetInsnAddr()175 GuestAddr GetInsnAddr() const { return pc_; } 176 GetReg(uint8_t reg)177 Register GetReg(uint8_t reg) { 178 CHECK_GT(reg, 0); 179 CHECK_LT(reg, std::size(ThreadState{}.cpu.x)); 180 if (IsRegMappingEnabled()) { 181 auto [mapped_reg, is_new_mapping] = GetMappedRegisterOrMap(reg); 182 if (is_new_mapping) { 183 int32_t offset = offsetof(ThreadState, cpu.x[0]) + reg * 8; 184 as_.Movq(mapped_reg, {.base = as_.rbp, .disp = offset}); 185 } 186 return mapped_reg; 187 } 188 Register result = AllocTempReg(); 189 int32_t offset = offsetof(ThreadState, cpu.x[0]) + reg * 8; 190 as_.Movq(result, {.base = as_.rbp, .disp = offset}); 191 return result; 192 } 193 SetReg(uint8_t reg,Register value)194 void SetReg(uint8_t reg, Register value) { 195 CHECK_GT(reg, 0); 196 CHECK_LT(reg, std::size(ThreadState{}.cpu.x)); 197 CHECK_LE(reg, kNumGuestRegs); 198 if (IsRegMappingEnabled()) { 199 auto [mapped_reg, _] = GetMappedRegisterOrMap(reg); 200 if (success()) { 201 as_.Movq(mapped_reg, value); 202 gp_maintainer_.NoticeModified(reg); 203 } 204 return; 205 } 206 int32_t offset = offsetof(ThreadState, cpu.x[0]) + reg * 8; 207 as_.Movq({.base = as_.rbp, .disp = offset}, value); 208 } 209 StoreMappedRegs()210 void StoreMappedRegs() { 211 if (!IsRegMappingEnabled()) { 212 return; 213 } 214 for (int i = 0; i < int(kNumGuestRegs); i++) { 215 if (gp_maintainer_.IsModified(i)) { 216 auto mapped_reg = gp_maintainer_.GetMapped(i); 217 int32_t offset = offsetof(ThreadState, cpu.x[0]) + i * 8; 218 as_.Movq({.base = as_.rbp, .disp = offset}, mapped_reg); 219 } 220 } 221 for (int i = 0; i < int(kNumGuestFpRegs); i++) { 222 if (simd_maintainer_.IsModified(i)) { 223 auto mapped_reg = simd_maintainer_.GetMapped(i); 224 int32_t offset = offsetof(ThreadState, cpu.f) + i * sizeof(Float64); 225 StoreFpReg(mapped_reg, offset); 226 } 227 } 228 } 229 GetFpReg(uint8_t reg)230 FpRegister GetFpReg(uint8_t reg) { 231 CHECK_LT(reg, std::size(ThreadState{}.cpu.f)); 232 CHECK_LE(reg, kNumGuestFpRegs); 233 if (IsRegMappingEnabled()) { 234 auto [mapped_reg, is_new_mapping] = GetMappedFpRegOrMap(reg); 235 if (is_new_mapping) { 236 int32_t offset = offsetof(ThreadState, cpu.f) + reg * sizeof(Float64); 237 as_.Movsd(mapped_reg, {.base = Assembler::rbp, .disp = offset}); 238 } 239 return mapped_reg; 240 } 241 SimdRegister result = AllocTempSimdReg(); 242 int32_t offset = offsetof(ThreadState, cpu.f) + reg * sizeof(Float64); 243 as_.Movsd(result, {.base = Assembler::rbp, .disp = offset}); 244 return result; 245 } 246 247 template <typename FloatType> GetFRegAndUnboxNan(uint8_t reg)248 FpRegister GetFRegAndUnboxNan(uint8_t reg) { 249 SimdRegister result = GetFpReg(reg); 250 SimdRegister unboxed_result = AllocTempSimdReg(); 251 if (host_platform::kHasAVX) { 252 as_.MacroUnboxNanAVX<FloatType>(unboxed_result, result); 253 } else { 254 as_.MacroUnboxNan<FloatType>(unboxed_result, result); 255 } 256 return unboxed_result; 257 } 258 259 template <typename FloatType> NanBoxFpReg(FpRegister value)260 void NanBoxFpReg(FpRegister value) { 261 if (host_platform::kHasAVX) { 262 as_.MacroNanBoxAVX<FloatType>(value, value); 263 return; 264 } 265 as_.MacroNanBox<FloatType>(value); 266 } 267 268 template <typename FloatType> NanBoxAndSetFpReg(uint8_t reg,FpRegister value)269 void NanBoxAndSetFpReg(uint8_t reg, FpRegister value) { 270 CHECK_LT(reg, std::size(ThreadState{}.cpu.f)); 271 int32_t offset = offsetof(ThreadState, cpu.f) + reg * sizeof(Float64); 272 NanBoxFpReg<FloatType>(value); 273 274 if (IsRegMappingEnabled()) { 275 auto [mapped_reg, _] = GetMappedFpRegOrMap(reg); 276 if (success()) { 277 // Operand type doesn't matter. 278 MoveFpReg(mapped_reg, value); 279 simd_maintainer_.NoticeModified(reg); 280 } 281 return; 282 } 283 284 StoreFpReg(value, offset); 285 } 286 287 // 288 // Various helper methods. 289 // 290 291 template <CsrName kName> GetCsr()292 [[nodiscard]] Register GetCsr() { 293 Register csr_reg = AllocTempReg(); 294 as_.Expand<uint64_t, CsrFieldType<kName>>( 295 csr_reg, {.base = Assembler::rbp, .disp = kCsrFieldOffset<kName>}); 296 return csr_reg; 297 } 298 299 template <CsrName kName> SetCsr(uint8_t imm)300 void SetCsr(uint8_t imm) { 301 // Note: csr immediate only have 5 bits in RISC-V encoding which guarantess us that 302 // “imm & kCsrMask<kName>”can be used as 8-bit immediate. 303 as_.Mov<CsrFieldType<kName>>({.base = Assembler::rbp, .disp = kCsrFieldOffset<kName>}, 304 static_cast<int8_t>(imm & kCsrMask<kName>)); 305 } 306 307 template <CsrName kName> SetCsr(Register arg)308 void SetCsr(Register arg) { 309 // Use RCX as temporary register. 310 as_.Mov<CsrFieldType<kName>>(Assembler::rcx, arg); 311 if constexpr (sizeof(CsrFieldType<kName>) <= sizeof(int32_t)) { 312 as_.And<CsrFieldType<kName>>(Assembler::rcx, kCsrMask<kName>); 313 } else { 314 as_.And<CsrFieldType<kName>>(Assembler::rcx, 315 {.disp = constants_pool::kConst<uint64_t{kCsrMask<kName>}>}); 316 } 317 as_.Mov<CsrFieldType<kName>>({.base = Assembler::rbp, .disp = kCsrFieldOffset<kName>}, 318 Assembler::rcx); 319 } 320 GetImm(uint64_t imm)321 [[nodiscard]] Register GetImm(uint64_t imm) { 322 Register imm_reg = AllocTempReg(); 323 as_.Movq(imm_reg, imm); 324 return imm_reg; 325 } 326 Copy(Register value)327 [[nodiscard]] Register Copy(Register value) { 328 Register result = AllocTempReg(); 329 as_.Movq(result, value); 330 return result; 331 } 332 Undefined()333 void Undefined() { success_ = false; } 334 gp_maintainer()335 RegisterFileMaintainer<Register, kNumGuestRegs>* gp_maintainer() { return &gp_maintainer_; } simd_maintainer()336 RegisterFileMaintainer<SimdRegister, kNumGuestFpRegs>* simd_maintainer() { 337 return &simd_maintainer_; 338 } as()339 [[nodiscard]] Assembler* as() { return &as_; } success()340 [[nodiscard]] bool success() const { return success_; } 341 FreeTempRegs()342 void FreeTempRegs() { 343 gp_allocator_.FreeTemps(); 344 simd_allocator_.FreeTemps(); 345 } 346 StoreFpReg(FpRegister value,int32_t offset)347 void StoreFpReg(FpRegister value, int32_t offset) { 348 if (host_platform::kHasAVX) { 349 as_.Vmovsd({.base = Assembler::rbp, .disp = offset}, value); 350 } else { 351 as_.Movsd({.base = Assembler::rbp, .disp = offset}, value); 352 } 353 } 354 MoveFpReg(FpRegister reg,FpRegister value)355 void MoveFpReg(FpRegister reg, FpRegister value) { 356 if (host_platform::kHasAVX) { 357 as_.Vmovsd(reg, value, value); 358 } else { 359 as_.Movsd(reg, value); 360 } 361 } 362 363 #include "berberis/intrinsics/translator_intrinsics_hooks-inl.h" 364 is_region_end_reached()365 bool is_region_end_reached() const { return is_region_end_reached_; } 366 IncrementInsnAddr(uint8_t insn_size)367 void IncrementInsnAddr(uint8_t insn_size) { pc_ += insn_size; } 368 IsRegMappingEnabled()369 bool IsRegMappingEnabled() { return params_.enable_reg_mapping; } 370 GetMappedRegisterOrMap(int reg)371 std::tuple<Register, bool> GetMappedRegisterOrMap(int reg) { 372 if (gp_maintainer_.IsMapped(reg)) { 373 return {gp_maintainer_.GetMapped(reg), false}; 374 } 375 376 if (auto alloc_result = gp_allocator_.Alloc()) { 377 gp_maintainer_.Map(reg, alloc_result.value()); 378 return {alloc_result.value(), true}; 379 } 380 success_ = false; 381 return {{}, false}; 382 } 383 GetMappedFpRegOrMap(int reg)384 std::tuple<SimdRegister, bool> GetMappedFpRegOrMap(int reg) { 385 if (simd_maintainer_.IsMapped(reg)) { 386 return {simd_maintainer_.GetMapped(reg), false}; 387 } 388 389 if (auto alloc_result = simd_allocator_.Alloc()) { 390 simd_maintainer_.Map(reg, alloc_result.value()); 391 return {alloc_result.value(), true}; 392 } 393 success_ = false; 394 return {{}, false}; 395 } 396 AllocTempReg()397 Register AllocTempReg() { 398 if (auto reg_option = gp_allocator_.AllocTemp()) { 399 return reg_option.value(); 400 } 401 success_ = false; 402 return {}; 403 }; 404 AllocTempSimdReg()405 SimdRegister AllocTempSimdReg() { 406 if (auto reg_option = simd_allocator_.AllocTemp()) { 407 return reg_option.value(); 408 } 409 success_ = false; 410 return {}; 411 }; 412 413 template <typename IntType, bool aq, bool rl> Lr(Register)414 Register Lr(Register /* addr */) { 415 Undefined(); 416 return {}; 417 } 418 419 template <typename IntType, bool aq, bool rl> Sc(Register,Register)420 Register Sc(Register /* addr */, Register /* data */) { 421 Undefined(); 422 return {}; 423 } 424 425 private: 426 template <auto kFunction, typename AssemblerResType, typename... AssemblerArgType> CallIntrinsic(AssemblerArgType...args)427 AssemblerResType CallIntrinsic(AssemblerArgType... args) { 428 if constexpr (std::is_same_v<AssemblerResType, void>) { 429 if (inline_intrinsic::TryInlineIntrinsic<kFunction>( 430 as_, 431 [this]() { return AllocTempReg(); }, 432 [this]() { return AllocTempSimdReg(); }, 433 std::monostate{}, 434 args...)) { 435 return; 436 } 437 call_intrinsic::CallIntrinsic<AssemblerResType>(as_, kFunction, args...); 438 } else { 439 AssemblerResType result; 440 if constexpr (std::is_same_v<AssemblerResType, Register>) { 441 result = AllocTempReg(); 442 } else if constexpr (std::is_same_v<AssemblerResType, std::tuple<Register, Register>>) { 443 result = std::tuple{AllocTempReg(), AllocTempReg()}; 444 } else if constexpr (std::is_same_v<AssemblerResType, SimdRegister>) { 445 result = AllocTempSimdReg(); 446 } else { 447 // This should not be reached by the compiler. If it is - there is a new result type that 448 // needs to be supported. 449 static_assert(kDependentTypeFalse<AssemblerResType>, "Unsupported result type"); 450 } 451 452 if (inline_intrinsic::TryInlineIntrinsic<kFunction>( 453 as_, 454 [this]() { return AllocTempReg(); }, 455 [this]() { return AllocTempSimdReg(); }, 456 result, 457 args...)) { 458 return result; 459 } 460 461 call_intrinsic::CallIntrinsic<AssemblerResType>(as_, kFunction, result, args...); 462 463 return result; 464 } 465 } 466 467 Assembler as_; 468 bool success_; 469 GuestAddr pc_; 470 Allocator<Register> gp_allocator_; 471 RegisterFileMaintainer<Register, kNumGuestRegs> gp_maintainer_; 472 RegisterFileMaintainer<SimdRegister, kNumGuestFpRegs> simd_maintainer_; 473 Allocator<SimdRegister> simd_allocator_; 474 const LiteTranslateParams params_; 475 bool is_region_end_reached_; 476 }; 477 478 template <> 479 [[nodiscard]] inline LiteTranslator::Register LiteTranslator::GetCsr<CsrName::kCycle>() { 480 return CPUClockCount(); 481 } 482 483 template <> 484 [[nodiscard]] inline LiteTranslator::Register LiteTranslator::GetCsr<CsrName::kFCsr>() { 485 Register csr_reg = AllocTempReg(); 486 bool inline_succeful = inline_intrinsic::TryInlineIntrinsic<&intrinsics::FeGetExceptions>( 487 as_, 488 [this]() { return AllocTempReg(); }, 489 [this]() { return AllocTempSimdReg(); }, 490 Assembler::rax); 491 CHECK(inline_succeful); 492 as_.Expand<uint64_t, CsrFieldType<CsrName::kFrm>>( 493 csr_reg, {.base = Assembler::rbp, .disp = kCsrFieldOffset<CsrName::kFrm>}); 494 as_.Shl<uint8_t>(csr_reg, 5); 495 as_.Or<uint8_t>(csr_reg, as_.rax); 496 return csr_reg; 497 } 498 499 template <> 500 [[nodiscard]] inline LiteTranslator::Register LiteTranslator::GetCsr<CsrName::kFFlags>() { 501 return FeGetExceptions(); 502 } 503 504 template <> 505 [[nodiscard]] inline LiteTranslator::Register LiteTranslator::GetCsr<CsrName::kVlenb>() { 506 return GetImm(16); 507 } 508 509 template <> 510 [[nodiscard]] inline LiteTranslator::Register LiteTranslator::GetCsr<CsrName::kVxrm>() { 511 Register reg = AllocTempReg(); 512 as_.Expand<uint64_t, uint8_t>(reg, 513 {.base = Assembler::rbp, .disp = kCsrFieldOffset<CsrName::kVcsr>}); 514 as_.And<uint8_t>(reg, 0b11); 515 return reg; 516 } 517 518 template <> 519 [[nodiscard]] inline LiteTranslator::Register LiteTranslator::GetCsr<CsrName::kVxsat>() { 520 Register reg = AllocTempReg(); 521 as_.Expand<uint64_t, uint8_t>(reg, 522 {.base = Assembler::rbp, .disp = kCsrFieldOffset<CsrName::kVcsr>}); 523 as_.Shr<uint8_t>(reg, 2); 524 return reg; 525 } 526 527 template <> 528 inline void LiteTranslator::SetCsr<CsrName::kFCsr>(uint8_t imm) { 529 // Note: instructions Csrrci or Csrrsi couldn't affect Frm because immediate only has five bits. 530 // But these instruction don't pass their immediate-specified argument into `SetCsr`, they combine 531 // it with register first. Fixing that can only be done by changing code in the semantics player. 532 // 533 // But Csrrwi may clear it. And we actually may only arrive here from Csrrwi. 534 // Thus, technically, we know that imm >> 5 is always zero, but it doesn't look like a good idea 535 // to rely on that: it's very subtle and it only affects code generation speed. 536 as_.Mov<uint8_t>({.base = Assembler::rbp, .disp = kCsrFieldOffset<CsrName::kFrm>}, 537 static_cast<int8_t>(imm >> 5)); 538 as_.MacroFeSetExceptionsAndRoundImmTranslate( 539 {Assembler::rbp, .disp = static_cast<int>(offsetof(ThreadState, intrinsics_scratch_area))}, 540 imm); 541 } 542 543 template <> 544 inline void LiteTranslator::SetCsr<CsrName::kFCsr>(Register arg) { 545 // Use RAX as temporary register for exceptions and RCX for rm. 546 // We know RCX would be used by FeSetRound, too. 547 as_.Mov<uint8_t>(Assembler::rax, arg); 548 as_.And<uint32_t>(Assembler::rax, 0b1'1111); 549 as_.Shldl(Assembler::rcx, arg, int8_t{32 - 5}); 550 as_.And<uint8_t>(Assembler::rcx, kCsrMask<CsrName::kFrm>); 551 as_.Mov<uint8_t>({.base = Assembler::rbp, .disp = kCsrFieldOffset<CsrName::kFrm>}, 552 Assembler::rcx); 553 as_.MacroFeSetExceptionsAndRoundTranslate( 554 Assembler::rax, 555 {Assembler::rbp, .disp = static_cast<int>(offsetof(ThreadState, intrinsics_scratch_area))}, 556 Assembler::rax); 557 } 558 559 template <> 560 inline void LiteTranslator::SetCsr<CsrName::kFFlags>(uint8_t imm) { 561 FeSetExceptionsImm(static_cast<int8_t>(imm & 0b1'1111)); 562 } 563 564 template <> 565 inline void LiteTranslator::SetCsr<CsrName::kFFlags>(Register arg) { 566 // Use RAX as temporary register. 567 as_.Mov<uint8_t>(Assembler::rax, arg); 568 as_.And<uint32_t>(Assembler::rax, 0b1'1111); 569 FeSetExceptions(Assembler::rax); 570 } 571 572 template <> 573 inline void LiteTranslator::SetCsr<CsrName::kFrm>(uint8_t imm) { 574 as_.Mov<uint8_t>({.base = Assembler::rbp, .disp = kCsrFieldOffset<CsrName::kFrm>}, 575 static_cast<int8_t>(imm & kCsrMask<CsrName::kFrm>)); 576 FeSetRoundImm(static_cast<int8_t>(imm & kCsrMask<CsrName::kFrm>)); 577 } 578 579 template <> 580 inline void LiteTranslator::SetCsr<CsrName::kFrm>(Register arg) { 581 // Use RCX as temporary register. We know it would be used by FeSetRound, too. 582 as_.Mov<uint8_t>(Assembler::rcx, arg); 583 as_.And<uint8_t>(Assembler::rcx, kCsrMask<CsrName::kFrm>); 584 as_.Mov<uint8_t>({.base = Assembler::rbp, .disp = kCsrFieldOffset<CsrName::kFrm>}, 585 Assembler::rcx); 586 FeSetRound(Assembler::rcx); 587 } 588 589 template <> 590 inline void LiteTranslator::SetCsr<CsrName::kVxrm>(uint8_t imm) { 591 imm &= 0b11; 592 if (imm != 0b11) { 593 as_.And<uint8_t>({.base = Assembler::rbp, .disp = kCsrFieldOffset<CsrName::kVcsr>}, 0b100); 594 } 595 if (imm != 0b00) { 596 as_.Or<uint8_t>({.base = Assembler::rbp, .disp = kCsrFieldOffset<CsrName::kVcsr>}, imm); 597 } 598 } 599 600 template <> 601 inline void LiteTranslator::SetCsr<CsrName::kVxrm>(Register arg) { 602 as_.And<uint8_t>({.base = Assembler::rbp, .disp = kCsrFieldOffset<CsrName::kVcsr>}, 0b100); 603 as_.And<uint8_t>(arg, 0b11); 604 as_.Or<uint8_t>({.base = Assembler::rbp, .disp = kCsrFieldOffset<CsrName::kVcsr>}, arg); 605 } 606 607 template <> 608 inline void LiteTranslator::SetCsr<CsrName::kVxsat>(uint8_t imm) { 609 if (imm & 0b1) { 610 as_.Or<uint8_t>({.base = Assembler::rbp, .disp = kCsrFieldOffset<CsrName::kVcsr>}, 0b100); 611 } else { 612 as_.And<uint8_t>({.base = Assembler::rbp, .disp = kCsrFieldOffset<CsrName::kVcsr>}, 0b11); 613 } 614 } 615 616 template <> 617 inline void LiteTranslator::SetCsr<CsrName::kVxsat>(Register arg) { 618 as_.And<uint8_t>({.base = Assembler::rbp, .disp = kCsrFieldOffset<CsrName::kVcsr>}, 0b11); 619 as_.Test<uint8_t>(arg, 1); 620 // Use RCX as temporary register. 621 as_.Setcc(Condition::kNotZero, as_.rcx); 622 as_.Shl<uint8_t>(as_.rcx, int8_t{2}); 623 as_.Or<uint8_t>({.base = Assembler::rbp, .disp = kCsrFieldOffset<CsrName::kVcsr>}, as_.rcx); 624 } 625 626 // There is no NanBoxing for Float64 except on CPUs with Float128 support. 627 template <> 628 inline LiteTranslator::FpRegister LiteTranslator::GetFRegAndUnboxNan<LiteTranslator::Float64>( 629 uint8_t reg) { 630 SimdRegister result = GetFpReg(reg); 631 return result; 632 } 633 634 template <> 635 inline void LiteTranslator::NanBoxFpReg<LiteTranslator::Float64>(FpRegister) {} 636 637 } // namespace berberis 638 639 #endif // BERBERIS_LITE_TRANSLATOR_RISCV64_TO_X86_64_H_ 640