1 /*
2  * Copyright (C) 2023 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "frontend.h"
18 
19 #include <cstddef>
20 
21 #include "berberis/assembler/x86_64.h"
22 #include "berberis/backend/common/machine_ir.h"
23 #include "berberis/backend/x86_64/machine_ir.h"
24 #include "berberis/base/checks.h"
25 #include "berberis/base/config.h"
26 #include "berberis/guest_state/guest_state_arch.h"
27 #include "berberis/guest_state/guest_state_opaque.h"
28 #include "berberis/runtime_primitives/memory_region_reservation.h"
29 #include "berberis/runtime_primitives/platform.h"
30 
31 namespace berberis {
32 
33 using BranchOpcode = HeavyOptimizerFrontend::Decoder::BranchOpcode;
34 using FpRegister = HeavyOptimizerFrontend::FpRegister;
35 using Register = HeavyOptimizerFrontend::Register;
36 
CompareAndBranch(BranchOpcode opcode,Register arg1,Register arg2,int16_t offset)37 void HeavyOptimizerFrontend::CompareAndBranch(BranchOpcode opcode,
38                                               Register arg1,
39                                               Register arg2,
40                                               int16_t offset) {
41   auto ir = builder_.ir();
42   auto cur_bb = builder_.bb();
43   MachineBasicBlock* then_bb = ir->NewBasicBlock();
44   MachineBasicBlock* else_bb = ir->NewBasicBlock();
45   ir->AddEdge(cur_bb, then_bb);
46   ir->AddEdge(cur_bb, else_bb);
47 
48   Gen<x86_64::CmpqRegReg>(arg1, arg2, GetFlagsRegister());
49   Gen<PseudoCondBranch>(ToAssemblerCond(opcode), then_bb, else_bb, GetFlagsRegister());
50 
51   builder_.StartBasicBlock(then_bb);
52   GenJump(pc_ + offset);
53 
54   builder_.StartBasicBlock(else_bb);
55 }
56 
Branch(int32_t offset)57 void HeavyOptimizerFrontend::Branch(int32_t offset) {
58   is_uncond_branch_ = true;
59   GenJump(pc_ + offset);
60 }
61 
BranchRegister(Register src,int16_t offset)62 void HeavyOptimizerFrontend::BranchRegister(Register src, int16_t offset) {
63   is_uncond_branch_ = true;
64   Register target = AllocTempReg();
65   Gen<PseudoCopy>(target, src, 8);
66   // Avoid the extra insn if unneeded.
67   if (offset != 0) {
68     Gen<x86_64::AddqRegImm>(target, offset, GetFlagsRegister());
69   }
70   // TODO(b/232598137) Maybe move this to translation cache?
71   Gen<x86_64::AndqRegImm>(target, ~int32_t{1}, GetFlagsRegister());
72   ExitRegionIndirect(target);
73 }
74 
ToAssemblerCond(BranchOpcode opcode)75 x86_64::Assembler::Condition HeavyOptimizerFrontend::ToAssemblerCond(BranchOpcode opcode) {
76   switch (opcode) {
77     case BranchOpcode::kBeq:
78       return x86_64::Assembler::Condition::kEqual;
79     case BranchOpcode::kBne:
80       return x86_64::Assembler::Condition::kNotEqual;
81     case BranchOpcode::kBlt:
82       return x86_64::Assembler::Condition::kLess;
83     case BranchOpcode::kBge:
84       return x86_64::Assembler::Condition::kGreaterEqual;
85     case BranchOpcode::kBltu:
86       return x86_64::Assembler::Condition::kBelow;
87     case BranchOpcode::kBgeu:
88       return x86_64::Assembler::Condition::kAboveEqual;
89   }
90 }
91 
GetImm(uint64_t imm)92 Register HeavyOptimizerFrontend::GetImm(uint64_t imm) {
93   Register result = AllocTempReg();
94   Gen<x86_64::MovqRegImm>(result, imm);
95   return result;
96 }
97 
AllocTempReg()98 Register HeavyOptimizerFrontend::AllocTempReg() {
99   return builder_.ir()->AllocVReg();
100 }
101 
AllocTempSimdReg()102 SimdReg HeavyOptimizerFrontend::AllocTempSimdReg() {
103   return SimdReg{builder_.ir()->AllocVReg()};
104 }
105 
GenJump(GuestAddr target)106 void HeavyOptimizerFrontend::GenJump(GuestAddr target) {
107   auto map_it = branch_targets_.find(target);
108   if (map_it == branch_targets_.end()) {
109     // Remember that this address was taken to help region formation. If we
110     // translate it later the data will be overwritten with the actual location.
111     branch_targets_[target] = MachineInsnPosition{};
112   }
113 
114   // Checking pending signals only on back jumps guarantees no infinite loops
115   // without pending signal checks.
116   auto kind = target <= GetInsnAddr() ? PseudoJump::Kind::kJumpWithPendingSignalsCheck
117                                       : PseudoJump::Kind::kJumpWithoutPendingSignalsCheck;
118 
119   Gen<PseudoJump>(target, kind);
120 }
121 
ExitGeneratedCode(GuestAddr target)122 void HeavyOptimizerFrontend::ExitGeneratedCode(GuestAddr target) {
123   Gen<PseudoJump>(target, PseudoJump::Kind::kExitGeneratedCode);
124 }
125 
ExitRegionIndirect(Register target)126 void HeavyOptimizerFrontend::ExitRegionIndirect(Register target) {
127   Gen<PseudoIndirectJump>(target);
128 }
Undefined()129 void HeavyOptimizerFrontend::Undefined() {
130   success_ = false;
131   ExitGeneratedCode(GetInsnAddr());
132   // We don't require region to end here as control flow may jump around
133   // the undefined instruction, so handle it as an unconditional branch.
134   is_uncond_branch_ = true;
135 }
136 
IsRegionEndReached() const137 bool HeavyOptimizerFrontend::IsRegionEndReached() const {
138   if (!is_uncond_branch_) {
139     return false;
140   }
141 
142   auto map_it = branch_targets_.find(GetInsnAddr());
143   // If this instruction following an unconditional branch isn't reachable by
144   // some other branch - it's a region end.
145   return map_it == branch_targets_.end();
146 }
147 
ResolveJumps()148 void HeavyOptimizerFrontend::ResolveJumps() {
149   if (!config::kLinkJumpsWithinRegion) {
150     return;
151   }
152   auto ir = builder_.ir();
153 
154   MachineBasicBlockList bb_list_copy(ir->bb_list());
155   for (auto bb : bb_list_copy) {
156     if (bb->is_recovery()) {
157       // Recovery blocks must exit region, do not try to resolve it into a local branch.
158       continue;
159     }
160 
161     const MachineInsn* last_insn = bb->insn_list().back();
162     if (last_insn->opcode() != kMachineOpPseudoJump) {
163       continue;
164     }
165 
166     auto* jump = static_cast<const PseudoJump*>(last_insn);
167     if (jump->kind() == PseudoJump::Kind::kSyscall ||
168         jump->kind() == PseudoJump::Kind::kExitGeneratedCode) {
169       // Syscall or generated code exit must always exit region.
170       continue;
171     }
172 
173     GuestAddr target = jump->target();
174     auto map_it = branch_targets_.find(target);
175     // All PseudoJump insns must add their targets to branch_targets.
176     CHECK(map_it != branch_targets_.end());
177 
178     MachineInsnPosition pos = map_it->second;
179     MachineBasicBlock* target_containing_bb = pos.first;
180     if (!target_containing_bb) {
181       // Branch target is not in the current region
182       continue;
183     }
184 
185     CHECK(pos.second.has_value());
186     auto target_insn_it = pos.second.value();
187     MachineBasicBlock* target_bb;
188     if (target_insn_it == target_containing_bb->insn_list().begin()) {
189       // We don't need to split if target_insn_it is at the beginning of target_containing_bb.
190       target_bb = target_containing_bb;
191     } else {
192       // target_bb is split from target_containing_bb.
193       target_bb = ir->SplitBasicBlock(target_containing_bb, target_insn_it);
194       UpdateBranchTargetsAfterSplit(target, target_containing_bb, target_bb);
195 
196       // Make sure target_bb is also considered for jump resolution. Otherwise we may leave code
197       // referenced by it unlinked from the rest of the IR.
198       bb_list_copy.push_back(target_bb);
199 
200       // If bb is equal to target_containing_bb, then the branch instruction at the end of bb
201       // is moved to the new target_bb, so we replace the instruction at the end of the
202       // target_bb instead of bb.
203       if (bb == target_containing_bb) {
204         bb = target_bb;
205       }
206     }
207 
208     ReplaceJumpWithBranch(bb, target_bb);
209   }
210 }
211 
ReplaceJumpWithBranch(MachineBasicBlock * bb,MachineBasicBlock * target_bb)212 void HeavyOptimizerFrontend::ReplaceJumpWithBranch(MachineBasicBlock* bb,
213                                                    MachineBasicBlock* target_bb) {
214   auto ir = builder_.ir();
215   const auto* last_insn = bb->insn_list().back();
216   CHECK_EQ(last_insn->opcode(), kMachineOpPseudoJump);
217   auto* jump = static_cast<const PseudoJump*>(last_insn);
218   GuestAddr target = static_cast<const PseudoJump*>(jump)->target();
219   // Do not invalidate this iterator as it may be a target for another jump.
220   // Instead overwrite the instruction.
221   auto jump_it = std::prev(bb->insn_list().end());
222 
223   if (jump->kind() == PseudoJump::Kind::kJumpWithoutPendingSignalsCheck) {
224     // Simple branch for forward jump.
225     *jump_it = ir->NewInsn<PseudoBranch>(target_bb);
226     ir->AddEdge(bb, target_bb);
227   } else {
228     CHECK(jump->kind() == PseudoJump::Kind::kJumpWithPendingSignalsCheck);
229     // See EmitCheckSignalsAndMaybeReturn.
230     auto* exit_bb = ir->NewBasicBlock();
231     // Note that we intentionally don't mark exit_bb as recovery and therefore don't request its
232     // reordering away from hot code spots. target_bb is a back branch and is unlikely to be a
233     // fall-through jump for the current bb. At the same time exit_bb can be a fall-through jump
234     // and benchmarks benefit from it.
235     const size_t offset = offsetof(ThreadState, pending_signals_status);
236     auto* cmpb = ir->NewInsn<x86_64::CmpbMemBaseDispImm>(
237         x86_64::kMachineRegRBP, offset, kPendingSignalsPresent, GetFlagsRegister());
238     *jump_it = cmpb;
239     auto* cond_branch = ir->NewInsn<PseudoCondBranch>(
240         x86_64::Assembler::Condition::kEqual, exit_bb, target_bb, GetFlagsRegister());
241     bb->insn_list().push_back(cond_branch);
242 
243     builder_.StartBasicBlock(exit_bb);
244     ExitGeneratedCode(target);
245 
246     ir->AddEdge(bb, exit_bb);
247     ir->AddEdge(bb, target_bb);
248   }
249 }
250 
UpdateBranchTargetsAfterSplit(GuestAddr addr,const MachineBasicBlock * old_bb,MachineBasicBlock * new_bb)251 void HeavyOptimizerFrontend::UpdateBranchTargetsAfterSplit(GuestAddr addr,
252                                                            const MachineBasicBlock* old_bb,
253                                                            MachineBasicBlock* new_bb) {
254   auto map_it = branch_targets_.find(addr);
255   CHECK(map_it != branch_targets_.end());
256   while (map_it != branch_targets_.end() && map_it->second.first == old_bb) {
257     map_it->second.first = new_bb;
258     map_it++;
259   }
260 }
261 
GetReg(uint8_t reg)262 Register HeavyOptimizerFrontend::GetReg(uint8_t reg) {
263   CHECK_LT(reg, kNumGuestRegs);
264   Register dst = AllocTempReg();
265   builder_.GenGet(dst, GetThreadStateRegOffset(reg));
266   return dst;
267 }
268 
SetReg(uint8_t reg,Register value)269 void HeavyOptimizerFrontend::SetReg(uint8_t reg, Register value) {
270   CHECK_LT(reg, kNumGuestRegs);
271   if (success()) {
272     builder_.GenPut(GetThreadStateRegOffset(reg), value);
273   }
274 }
275 
GetFpReg(uint8_t reg)276 FpRegister HeavyOptimizerFrontend::GetFpReg(uint8_t reg) {
277   FpRegister result = AllocTempSimdReg();
278   builder_.GenGetSimd<8>(result.machine_reg(), GetThreadStateFRegOffset(reg));
279   return result;
280 }
281 
Nop()282 void HeavyOptimizerFrontend::Nop() {}
283 
Op(Decoder::OpOpcode opcode,Register arg1,Register arg2)284 Register HeavyOptimizerFrontend::Op(Decoder::OpOpcode opcode, Register arg1, Register arg2) {
285   using OpOpcode = Decoder::OpOpcode;
286   using Condition = x86_64::Assembler::Condition;
287   auto res = AllocTempReg();
288   switch (opcode) {
289     case OpOpcode::kAdd:
290       Gen<PseudoCopy>(res, arg1, 8);
291       Gen<x86_64::AddqRegReg>(res, arg2, GetFlagsRegister());
292       break;
293     case OpOpcode::kSub:
294       Gen<PseudoCopy>(res, arg1, 8);
295       Gen<x86_64::SubqRegReg>(res, arg2, GetFlagsRegister());
296       break;
297     case OpOpcode::kAnd:
298       Gen<PseudoCopy>(res, arg1, 8);
299       Gen<x86_64::AndqRegReg>(res, arg2, GetFlagsRegister());
300       break;
301     case OpOpcode::kOr:
302       Gen<PseudoCopy>(res, arg1, 8);
303       Gen<x86_64::OrqRegReg>(res, arg2, GetFlagsRegister());
304       break;
305     case OpOpcode::kXor:
306       Gen<PseudoCopy>(res, arg1, 8);
307       Gen<x86_64::XorqRegReg>(res, arg2, GetFlagsRegister());
308       break;
309     case OpOpcode::kSll:
310       Gen<PseudoCopy>(res, arg1, 8);
311       Gen<x86_64::ShlqRegReg>(res, arg2, GetFlagsRegister());
312       break;
313     case OpOpcode::kSrl:
314       Gen<PseudoCopy>(res, arg1, 8);
315       Gen<x86_64::ShrqRegReg>(res, arg2, GetFlagsRegister());
316       break;
317     case OpOpcode::kSra:
318       Gen<PseudoCopy>(res, arg1, 8);
319       Gen<x86_64::SarqRegReg>(res, arg2, GetFlagsRegister());
320       break;
321     case OpOpcode::kSlt: {
322       Gen<x86_64::CmpqRegReg>(arg1, arg2, GetFlagsRegister());
323       auto temp = AllocTempReg();
324       Gen<x86_64::SetccReg>(Condition::kLess, temp, GetFlagsRegister());
325       Gen<x86_64::MovzxbqRegReg>(res, temp);
326       break;
327     }
328     case OpOpcode::kSltu: {
329       Gen<x86_64::CmpqRegReg>(arg1, arg2, GetFlagsRegister());
330       auto temp = AllocTempReg();
331       Gen<x86_64::SetccReg>(Condition::kBelow, temp, GetFlagsRegister());
332       Gen<x86_64::MovzxbqRegReg>(res, temp);
333       break;
334     }
335     case OpOpcode::kMul:
336       Gen<PseudoCopy>(res, arg1, 8);
337       Gen<x86_64::ImulqRegReg>(res, arg2, GetFlagsRegister());
338       break;
339     case OpOpcode::kMulh: {
340       auto rax = AllocTempReg();
341       auto rdx = AllocTempReg();
342       Gen<PseudoCopy>(rax, arg1, 8);
343       Gen<x86_64::ImulqRegRegReg>(rax, rdx, arg2, GetFlagsRegister());
344       Gen<PseudoCopy>(res, rdx, 8);
345     } break;
346     case OpOpcode::kMulhsu: {
347       Gen<PseudoCopy>(res, arg1, 8);
348       auto rax = AllocTempReg();
349       auto rdx = AllocTempReg();
350       Gen<PseudoCopy>(rax, arg2, 8);
351       Gen<x86_64::MulqRegRegReg>(rax, rdx, res, GetFlagsRegister());
352       Gen<x86_64::SarqRegImm>(res, 63, GetFlagsRegister());
353       Gen<x86_64::ImulqRegReg>(res, arg2, GetFlagsRegister());
354       Gen<x86_64::AddqRegReg>(res, rdx, GetFlagsRegister());
355     } break;
356     case OpOpcode::kMulhu: {
357       auto rax = AllocTempReg();
358       auto rdx = AllocTempReg();
359       Gen<PseudoCopy>(rax, arg1, 8);
360       Gen<x86_64::MulqRegRegReg>(rax, rdx, arg2, GetFlagsRegister());
361       Gen<PseudoCopy>(res, rdx, 8);
362     } break;
363     case OpOpcode::kAndn:
364       if (host_platform::kHasBMI) {
365         Gen<x86_64::AndnqRegRegReg>(res, arg2, arg1, GetFlagsRegister());
366       } else {
367         Gen<PseudoCopy>(res, arg2, 8);
368         Gen<x86_64::NotqReg>(res);
369         Gen<x86_64::AndqRegReg>(res, arg1, GetFlagsRegister());
370       }
371       break;
372     case OpOpcode::kOrn:
373       Gen<PseudoCopy>(res, arg2, 8);
374       Gen<x86_64::NotqReg>(res);
375       Gen<x86_64::OrqRegReg>(res, arg1, GetFlagsRegister());
376       break;
377     case OpOpcode::kXnor:
378       Gen<PseudoCopy>(res, arg2, 8);
379       Gen<x86_64::XorqRegReg>(res, arg1, GetFlagsRegister());
380       Gen<x86_64::NotqReg>(res);
381       break;
382     default:
383       Undefined();
384       return {};
385   }
386 
387   return res;
388 }
389 
Op32(Decoder::Op32Opcode opcode,Register arg1,Register arg2)390 Register HeavyOptimizerFrontend::Op32(Decoder::Op32Opcode opcode, Register arg1, Register arg2) {
391   using Op32Opcode = Decoder::Op32Opcode;
392   auto res = AllocTempReg();
393   auto unextended_res = res;
394   switch (opcode) {
395     case Op32Opcode::kAddw:
396       Gen<PseudoCopy>(res, arg1, 4);
397       Gen<x86_64::AddlRegReg>(res, arg2, GetFlagsRegister());
398       break;
399     case Op32Opcode::kSubw:
400       Gen<PseudoCopy>(res, arg1, 4);
401       Gen<x86_64::SublRegReg>(res, arg2, GetFlagsRegister());
402       break;
403     case Op32Opcode::kSllw:
404     case Op32Opcode::kSrlw:
405     case Op32Opcode::kSraw: {
406       auto rcx = AllocTempReg();
407       Gen<PseudoCopy>(res, arg1, 4);
408       Gen<PseudoCopy>(rcx, arg2, 4);
409       if (opcode == Op32Opcode::kSllw) {
410         Gen<x86_64::ShllRegReg>(res, rcx, GetFlagsRegister());
411       } else if (opcode == Op32Opcode::kSrlw) {
412         Gen<x86_64::ShrlRegReg>(res, rcx, GetFlagsRegister());
413       } else {
414         Gen<x86_64::SarlRegReg>(res, rcx, GetFlagsRegister());
415       }
416     } break;
417     case Op32Opcode::kMulw:
418       Gen<PseudoCopy>(res, arg1, 4);
419       Gen<x86_64::ImullRegReg>(res, arg2, GetFlagsRegister());
420       break;
421     default:
422       Undefined();
423       return {};
424   }
425   Gen<x86_64::MovsxlqRegReg>(res, unextended_res);
426   return res;
427 }
428 
OpImm(Decoder::OpImmOpcode opcode,Register arg,int16_t imm)429 Register HeavyOptimizerFrontend::OpImm(Decoder::OpImmOpcode opcode, Register arg, int16_t imm) {
430   using OpImmOpcode = Decoder::OpImmOpcode;
431   using Condition = x86_64::Assembler::Condition;
432   auto res = AllocTempReg();
433   switch (opcode) {
434     case OpImmOpcode::kAddi:
435       Gen<PseudoCopy>(res, arg, 8);
436       Gen<x86_64::AddqRegImm>(res, imm, GetFlagsRegister());
437       break;
438     case OpImmOpcode::kSlti: {
439       auto temp = AllocTempReg();
440       Gen<x86_64::CmpqRegImm>(arg, imm, GetFlagsRegister());
441       Gen<x86_64::SetccReg>(Condition::kLess, temp, GetFlagsRegister());
442       Gen<x86_64::MovsxbqRegReg>(res, temp);
443     } break;
444     case OpImmOpcode::kSltiu: {
445       auto temp = AllocTempReg();
446       Gen<x86_64::CmpqRegImm>(arg, imm, GetFlagsRegister());
447       Gen<x86_64::SetccReg>(Condition::kBelow, temp, GetFlagsRegister());
448       Gen<x86_64::MovsxbqRegReg>(res, temp);
449     } break;
450     case OpImmOpcode::kXori:
451       Gen<PseudoCopy>(res, arg, 8);
452       Gen<x86_64::XorqRegImm>(res, imm, GetFlagsRegister());
453       break;
454     case OpImmOpcode::kOri:
455       Gen<PseudoCopy>(res, arg, 8);
456       Gen<x86_64::OrqRegImm>(res, imm, GetFlagsRegister());
457       break;
458     case OpImmOpcode::kAndi:
459       Gen<PseudoCopy>(res, arg, 8);
460       Gen<x86_64::AndqRegImm>(res, imm, GetFlagsRegister());
461       break;
462     default:
463       Undefined();
464       return {};
465   }
466   return res;
467 }
468 
OpImm32(Decoder::OpImm32Opcode opcode,Register arg,int16_t imm)469 Register HeavyOptimizerFrontend::OpImm32(Decoder::OpImm32Opcode opcode, Register arg, int16_t imm) {
470   auto res = AllocTempReg();
471   switch (opcode) {
472     case Decoder::OpImm32Opcode::kAddiw:
473       Gen<PseudoCopy>(res, arg, 4);
474       Gen<x86_64::AddlRegImm>(res, imm, GetFlagsRegister());
475       Gen<x86_64::MovsxlqRegReg>(res, res);
476       break;
477     default:
478       Undefined();
479       return {};
480   }
481   return res;
482 }
483 
Slli(Register arg,int8_t imm)484 Register HeavyOptimizerFrontend::Slli(Register arg, int8_t imm) {
485   auto res = AllocTempReg();
486   Gen<PseudoCopy>(res, arg, 8);
487   Gen<x86_64::ShlqRegImm>(res, imm, GetFlagsRegister());
488   return res;
489 }
490 
Srli(Register arg,int8_t imm)491 Register HeavyOptimizerFrontend::Srli(Register arg, int8_t imm) {
492   auto res = AllocTempReg();
493   Gen<PseudoCopy>(res, arg, 8);
494   Gen<x86_64::ShrqRegImm>(res, imm, GetFlagsRegister());
495   return res;
496 }
497 
Srai(Register arg,int8_t imm)498 Register HeavyOptimizerFrontend::Srai(Register arg, int8_t imm) {
499   auto res = AllocTempReg();
500   Gen<PseudoCopy>(res, arg, 8);
501   Gen<x86_64::SarqRegImm>(res, imm, GetFlagsRegister());
502   return res;
503 }
504 
ShiftImm32(Decoder::ShiftImm32Opcode opcode,Register arg,uint16_t imm)505 Register HeavyOptimizerFrontend::ShiftImm32(Decoder::ShiftImm32Opcode opcode,
506                                             Register arg,
507                                             uint16_t imm) {
508   using ShiftImm32Opcode = Decoder::ShiftImm32Opcode;
509   auto res = AllocTempReg();
510   auto rcx = AllocTempReg();
511   Gen<PseudoCopy>(res, arg, 4);
512   Gen<x86_64::MovlRegImm>(rcx, imm);
513   switch (opcode) {
514     case ShiftImm32Opcode::kSlliw:
515       Gen<x86_64::ShllRegReg>(res, rcx, GetFlagsRegister());
516       break;
517     case ShiftImm32Opcode::kSrliw:
518       Gen<x86_64::ShrlRegReg>(res, rcx, GetFlagsRegister());
519       break;
520     case ShiftImm32Opcode::kSraiw:
521       Gen<x86_64::SarlRegReg>(res, rcx, GetFlagsRegister());
522       break;
523     default:
524       Undefined();
525       break;
526   }
527   Gen<x86_64::MovsxlqRegReg>(res, res);
528   return res;
529 }
530 
Rori(Register arg,int8_t shamt)531 Register HeavyOptimizerFrontend::Rori(Register arg, int8_t shamt) {
532   auto res = AllocTempReg();
533   Gen<PseudoCopy>(res, arg, 8);
534   Gen<x86_64::RorqRegImm>(res, shamt, GetFlagsRegister());
535   return res;
536 }
537 
Roriw(Register arg,int8_t shamt)538 Register HeavyOptimizerFrontend::Roriw(Register arg, int8_t shamt) {
539   auto res = AllocTempReg();
540   Gen<PseudoCopy>(res, arg, 8);
541   Gen<x86_64::RorlRegImm>(res, shamt, GetFlagsRegister());
542   Gen<x86_64::MovsxlqRegReg>(res, res);
543   return res;
544 }
545 
Lui(int32_t imm)546 Register HeavyOptimizerFrontend::Lui(int32_t imm) {
547   auto res = AllocTempReg();
548   Gen<x86_64::MovlRegImm>(res, imm);
549   Gen<x86_64::MovsxlqRegReg>(res, res);
550   return res;
551 }
552 
Auipc(int32_t imm)553 Register HeavyOptimizerFrontend::Auipc(int32_t imm) {
554   auto res = GetImm(GetInsnAddr());
555   auto temp = AllocTempReg();
556   Gen<x86_64::MovlRegImm>(temp, imm);
557   Gen<x86_64::MovsxlqRegReg>(temp, temp);
558   Gen<x86_64::AddqRegReg>(res, temp, GetFlagsRegister());
559   return res;
560 }
561 
Store(Decoder::MemoryDataOperandType operand_type,Register arg,int16_t offset,Register data)562 void HeavyOptimizerFrontend::Store(Decoder::MemoryDataOperandType operand_type,
563                                    Register arg,
564                                    int16_t offset,
565                                    Register data) {
566   int32_t sx_offset{offset};
567   StoreWithoutRecovery(operand_type, arg, sx_offset, data);
568   GenRecoveryBlockForLastInsn();
569 }
570 
Load(Decoder::LoadOperandType operand_type,Register arg,int16_t offset)571 Register HeavyOptimizerFrontend::Load(Decoder::LoadOperandType operand_type,
572                                       Register arg,
573                                       int16_t offset) {
574   int32_t sx_offset{offset};
575   auto res = LoadWithoutRecovery(operand_type, arg, sx_offset);
576   GenRecoveryBlockForLastInsn();
577   return res;
578 }
579 
GenRecoveryBlockForLastInsn()580 void HeavyOptimizerFrontend::GenRecoveryBlockForLastInsn() {
581   // TODO(b/311240558) Accurate Sigsegv?
582   auto* ir = builder_.ir();
583   auto* current_bb = builder_.bb();
584   auto* continue_bb = ir->NewBasicBlock();
585   auto* recovery_bb = ir->NewBasicBlock();
586   ir->AddEdge(current_bb, continue_bb);
587   ir->AddEdge(current_bb, recovery_bb);
588 
589   builder_.SetRecoveryPointAtLastInsn(recovery_bb);
590 
591   // Note, even though there are two bb successors, we only explicitly branch to
592   // the continue_bb, since jump to the recovery_bb is set up by the signal
593   // handler.
594   Gen<PseudoBranch>(continue_bb);
595 
596   builder_.StartBasicBlock(recovery_bb);
597   ExitGeneratedCode(GetInsnAddr());
598 
599   builder_.StartBasicBlock(continue_bb);
600 }
601 
602 //
603 //  Methods that are not part of SemanticsListener implementation.
604 //
StartInsn()605 void HeavyOptimizerFrontend::StartInsn() {
606   if (is_uncond_branch_) {
607     auto* ir = builder_.ir();
608     builder_.StartBasicBlock(ir->NewBasicBlock());
609   }
610 
611   is_uncond_branch_ = false;
612   // The iterators in branch_targets are the last iterators before generating an insn.
613   // We advance iterators by one step in Finalize(), as we'll use it to iterate
614   // the sub-list of instructions starting from the first one for the given
615   // guest address.
616 
617   // If a basic block is empty before generating insn, an empty optional typed
618   // value is returned. We will resolve it to the first insn of the basic block
619   // in Finalize().
620   branch_targets_[GetInsnAddr()] = builder_.GetMachineInsnPosition();
621 }
622 
Finalize(GuestAddr stop_pc)623 void HeavyOptimizerFrontend::Finalize(GuestAddr stop_pc) {
624   // Make sure the last basic block isn't empty before fixing iterators in
625   // branch_targets.
626   if (builder_.bb()->insn_list().empty() ||
627       !builder_.ir()->IsControlTransfer(builder_.bb()->insn_list().back())) {
628     GenJump(stop_pc);
629   }
630 
631   // This loop advances the iterators in the branch_targets by one. Because in
632   // StartInsn(), we saved the iterator to the last insn before we generate the
633   // first insn for each guest address. If an insn is saved as an empty optional,
634   // then the basic block is empty before we generate the first insn for the
635   // guest address. So we resolve it to the first insn in the basic block.
636   for (auto& [unused_address, insn_pos] : branch_targets_) {
637     auto& [bb, insn_it] = insn_pos;
638     if (!bb) {
639       // Branch target is not in the current region.
640       continue;
641     }
642 
643     if (insn_it.has_value()) {
644       insn_it.value()++;
645     } else {
646       // Make sure bb isn't still empty.
647       CHECK(!bb->insn_list().empty());
648       insn_it = bb->insn_list().begin();
649     }
650   }
651 
652   ResolveJumps();
653 }
654 
LoadWithoutRecovery(Decoder::LoadOperandType operand_type,Register base,int32_t disp)655 Register HeavyOptimizerFrontend::LoadWithoutRecovery(Decoder::LoadOperandType operand_type,
656                                                      Register base,
657                                                      int32_t disp) {
658   auto res = AllocTempReg();
659   switch (operand_type) {
660     case Decoder::LoadOperandType::k8bitUnsigned:
661       Gen<x86_64::MovzxblRegMemBaseDisp>(res, base, disp);
662       break;
663     case Decoder::LoadOperandType::k16bitUnsigned:
664       Gen<x86_64::MovzxwlRegMemBaseDisp>(res, base, disp);
665       break;
666     case Decoder::LoadOperandType::k32bitUnsigned:
667       Gen<x86_64::MovlRegMemBaseDisp>(res, base, disp);
668       break;
669     case Decoder::LoadOperandType::k64bit:
670       Gen<x86_64::MovqRegMemBaseDisp>(res, base, disp);
671       break;
672     case Decoder::LoadOperandType::k8bitSigned:
673       Gen<x86_64::MovsxbqRegMemBaseDisp>(res, base, disp);
674       break;
675     case Decoder::LoadOperandType::k16bitSigned:
676       Gen<x86_64::MovsxwqRegMemBaseDisp>(res, base, disp);
677       break;
678     case Decoder::LoadOperandType::k32bitSigned:
679       Gen<x86_64::MovsxlqRegMemBaseDisp>(res, base, disp);
680       break;
681     default:
682       Undefined();
683       return {};
684   }
685 
686   return res;
687 }
688 
LoadWithoutRecovery(Decoder::LoadOperandType operand_type,Register base,Register index,int32_t disp)689 Register HeavyOptimizerFrontend::LoadWithoutRecovery(Decoder::LoadOperandType operand_type,
690                                                      Register base,
691                                                      Register index,
692                                                      int32_t disp) {
693   auto res = AllocTempReg();
694   switch (operand_type) {
695     case Decoder::LoadOperandType::k8bitUnsigned:
696       Gen<x86_64::MovzxblRegMemBaseIndexDisp>(
697           res, base, index, x86_64::MachineMemOperandScale::kOne, disp);
698       break;
699     case Decoder::LoadOperandType::k16bitUnsigned:
700       Gen<x86_64::MovzxwlRegMemBaseIndexDisp>(
701           res, base, index, x86_64::MachineMemOperandScale::kOne, disp);
702       break;
703     case Decoder::LoadOperandType::k32bitUnsigned:
704       Gen<x86_64::MovlRegMemBaseIndexDisp>(
705           res, base, index, x86_64::MachineMemOperandScale::kOne, disp);
706       break;
707     case Decoder::LoadOperandType::k64bit:
708       Gen<x86_64::MovqRegMemBaseIndexDisp>(
709           res, base, index, x86_64::MachineMemOperandScale::kOne, disp);
710       break;
711     case Decoder::LoadOperandType::k8bitSigned:
712       Gen<x86_64::MovsxbqRegMemBaseIndexDisp>(
713           res, base, index, x86_64::MachineMemOperandScale::kOne, disp);
714       break;
715     case Decoder::LoadOperandType::k16bitSigned:
716       Gen<x86_64::MovsxwqRegMemBaseIndexDisp>(
717           res, base, index, x86_64::MachineMemOperandScale::kOne, disp);
718       break;
719     case Decoder::LoadOperandType::k32bitSigned:
720       Gen<x86_64::MovsxlqRegMemBaseIndexDisp>(
721           res, base, index, x86_64::MachineMemOperandScale::kOne, disp);
722       break;
723     default:
724       Undefined();
725       return {};
726   }
727   return res;
728 }
729 
UpdateCsr(Decoder::CsrOpcode opcode,Register arg,Register csr)730 Register HeavyOptimizerFrontend::UpdateCsr(Decoder::CsrOpcode opcode, Register arg, Register csr) {
731   Register res = AllocTempReg();
732   switch (opcode) {
733     case Decoder::CsrOpcode::kCsrrs:
734       Gen<PseudoCopy>(res, arg, 8);
735       Gen<x86_64::OrqRegReg>(res, csr, GetFlagsRegister());
736       break;
737     case Decoder::CsrOpcode::kCsrrc:
738       if (host_platform::kHasBMI) {
739         Gen<x86_64::AndnqRegRegReg>(res, arg, csr, GetFlagsRegister());
740       } else {
741         Gen<PseudoCopy>(res, arg, 8);
742         Gen<x86_64::NotqReg>(res);
743         Gen<x86_64::AndqRegReg>(res, csr, GetFlagsRegister());
744       }
745       break;
746     default:
747       Undefined();
748       return {};
749   }
750   return res;
751 }
752 
UpdateCsr(Decoder::CsrImmOpcode opcode,uint8_t imm,Register csr)753 Register HeavyOptimizerFrontend::UpdateCsr(Decoder::CsrImmOpcode opcode,
754                                            uint8_t imm,
755                                            Register csr) {
756   Register res = AllocTempReg();
757   switch (opcode) {
758     case Decoder::CsrImmOpcode::kCsrrwi:
759       Gen<x86_64::MovlRegImm>(res, imm);
760       break;
761     case Decoder::CsrImmOpcode::kCsrrsi:
762       Gen<x86_64::MovlRegImm>(res, imm);
763       Gen<x86_64::OrqRegReg>(res, csr, GetFlagsRegister());
764       break;
765     case Decoder::CsrImmOpcode::kCsrrci:
766       Gen<x86_64::MovqRegImm>(res, static_cast<int8_t>(~imm));
767       Gen<x86_64::AndqRegReg>(res, csr, GetFlagsRegister());
768       break;
769     default:
770       Undefined();
771       return {};
772   }
773   return res;
774 }
775 
StoreWithoutRecovery(Decoder::MemoryDataOperandType operand_type,Register base,int32_t disp,Register data)776 void HeavyOptimizerFrontend::StoreWithoutRecovery(Decoder::MemoryDataOperandType operand_type,
777                                                   Register base,
778                                                   int32_t disp,
779                                                   Register data) {
780   switch (operand_type) {
781     case Decoder::MemoryDataOperandType::k8bit:
782       Gen<x86_64::MovbMemBaseDispReg>(base, disp, data);
783       break;
784     case Decoder::MemoryDataOperandType::k16bit:
785       Gen<x86_64::MovwMemBaseDispReg>(base, disp, data);
786       break;
787     case Decoder::MemoryDataOperandType::k32bit:
788       Gen<x86_64::MovlMemBaseDispReg>(base, disp, data);
789       break;
790     case Decoder::MemoryDataOperandType::k64bit:
791       Gen<x86_64::MovqMemBaseDispReg>(base, disp, data);
792       break;
793     default:
794       return Undefined();
795   }
796 }
797 
StoreWithoutRecovery(Decoder::MemoryDataOperandType operand_type,Register base,Register index,int32_t disp,Register data)798 void HeavyOptimizerFrontend::StoreWithoutRecovery(Decoder::MemoryDataOperandType operand_type,
799                                                   Register base,
800                                                   Register index,
801                                                   int32_t disp,
802                                                   Register data) {
803   switch (operand_type) {
804     case Decoder::MemoryDataOperandType::k8bit:
805       Gen<x86_64::MovbMemBaseIndexDispReg>(
806           base, index, x86_64::MachineMemOperandScale::kOne, disp, data);
807       break;
808     case Decoder::MemoryDataOperandType::k16bit:
809       Gen<x86_64::MovwMemBaseIndexDispReg>(
810           base, index, x86_64::MachineMemOperandScale::kOne, disp, data);
811       break;
812     case Decoder::MemoryDataOperandType::k32bit:
813       Gen<x86_64::MovlMemBaseIndexDispReg>(
814           base, index, x86_64::MachineMemOperandScale::kOne, disp, data);
815       break;
816     case Decoder::MemoryDataOperandType::k64bit:
817       Gen<x86_64::MovqMemBaseIndexDispReg>(
818           base, index, x86_64::MachineMemOperandScale::kOne, disp, data);
819       break;
820     default:
821       return Undefined();
822   }
823 }
824 
825 // Ordering affecting I/O devices is not relevant to user-space code thus we just ignore bits
826 // related to devices I/O.
Fence(Decoder::FenceOpcode,Register,bool sw,bool sr,bool,bool,bool pw,bool pr,bool,bool)827 void HeavyOptimizerFrontend::Fence(Decoder::FenceOpcode /* opcode */,
828                                    Register /* src */,
829                                    bool sw,
830                                    bool sr,
831                                    bool /* so */,
832                                    bool /* si */,
833                                    bool pw,
834                                    bool pr,
835                                    bool /* po */,
836                                    bool /* pi */) {
837   // Two types of fences (total store ordering fence and normal fence) are supposed to be
838   // processed differently, but only for the “read_fence && write_fence” case (otherwise total
839   // store ordering fence becomes normal fence for the “forward compatibility”), yet because x86
840   // doesn't distinguish between these two types of fences and since we are supposed to map all
841   // not-yet defined fences to normal fence (again, for the “forward compatibility”) it's Ok to
842   // just ignore opcode field.
843   bool read_fence = sr | pr;
844   bool write_fence = sw | pw;
845   if (read_fence) {
846     if (write_fence) {
847       Gen<x86_64::Mfence>();
848     } else {
849       Gen<x86_64::Lfence>();
850     }
851   } else if (write_fence) {
852     Gen<x86_64::Sfence>();
853   }
854 }
855 
MemoryRegionReservationLoad(Register aligned_addr)856 void HeavyOptimizerFrontend::MemoryRegionReservationLoad(Register aligned_addr) {
857   // Store aligned_addr in CPUState.
858   int32_t address_offset = GetThreadStateReservationAddressOffset();
859   Gen<x86_64::MovqMemBaseDispReg>(x86_64::kMachineRegRBP, address_offset, aligned_addr);
860 
861   // MemoryRegionReservation::SetOwner(aligned_addr, &(state->cpu)).
862   builder_.GenCallImm(bit_cast<uint64_t>(&MemoryRegionReservation::SetOwner),
863                       GetFlagsRegister(),
864                       std::array<x86_64::CallImm::Arg, 2>{{
865                           {aligned_addr, x86_64::CallImm::kIntRegType},
866                           {x86_64::kMachineRegRBP, x86_64::CallImm::kIntRegType},
867                       }});
868 
869   // Load reservation value and store it in CPUState.
870   auto reservation = AllocTempReg();
871   Gen<x86_64::MovqRegMemBaseDisp>(reservation, aligned_addr, 0);
872   int32_t value_offset = GetThreadStateReservationValueOffset();
873   Gen<x86_64::MovqMemBaseDispReg>(x86_64::kMachineRegRBP, value_offset, reservation);
874 }
875 
MemoryRegionReservationExchange(Register aligned_addr,Register curr_reservation_value)876 Register HeavyOptimizerFrontend::MemoryRegionReservationExchange(Register aligned_addr,
877                                                                  Register curr_reservation_value) {
878   auto* ir = builder_.ir();
879   auto* cur_bb = builder_.bb();
880   auto* addr_match_bb = ir->NewBasicBlock();
881   auto* failure_bb = ir->NewBasicBlock();
882   auto* continue_bb = ir->NewBasicBlock();
883   ir->AddEdge(cur_bb, addr_match_bb);
884   ir->AddEdge(cur_bb, failure_bb);
885   ir->AddEdge(failure_bb, continue_bb);
886   Register result = AllocTempReg();
887 
888   // MemoryRegionReservation::Clear.
889   Register stored_aligned_addr = AllocTempReg();
890   int32_t address_offset = GetThreadStateReservationAddressOffset();
891   Gen<x86_64::MovqRegMemBaseDisp>(stored_aligned_addr, x86_64::kMachineRegRBP, address_offset);
892   Gen<x86_64::MovqMemBaseDispImm>(x86_64::kMachineRegRBP, address_offset, kNullGuestAddr);
893   // Compare aligned_addr to the one in CPUState.
894   Gen<x86_64::CmpqRegReg>(stored_aligned_addr, aligned_addr, GetFlagsRegister());
895   Gen<PseudoCondBranch>(
896       x86_64::Assembler::Condition::kNotEqual, failure_bb, addr_match_bb, GetFlagsRegister());
897 
898   builder_.StartBasicBlock(addr_match_bb);
899   // Load new reservation value into integer register where CmpXchgq expects it.
900   Register new_reservation_value = AllocTempReg();
901   int32_t value_offset = GetThreadStateReservationValueOffset();
902   Gen<x86_64::MovqRegMemBaseDisp>(new_reservation_value, x86_64::kMachineRegRBP, value_offset);
903 
904   MemoryRegionReservationSwapWithLockedOwner(
905       aligned_addr, curr_reservation_value, new_reservation_value, failure_bb);
906 
907   ir->AddEdge(builder_.bb(), continue_bb);
908   // Pseudo-def for use-def operand of XOR to make sure data-flow is integrate.
909   Gen<PseudoDefReg>(result);
910   Gen<x86_64::XorqRegReg>(result, result, GetFlagsRegister());
911   Gen<PseudoBranch>(continue_bb);
912 
913   builder_.StartBasicBlock(failure_bb);
914   Gen<x86_64::MovqRegImm>(result, 1);
915   Gen<PseudoBranch>(continue_bb);
916 
917   builder_.StartBasicBlock(continue_bb);
918 
919   return result;
920 }
921 
MemoryRegionReservationSwapWithLockedOwner(Register aligned_addr,Register curr_reservation_value,Register new_reservation_value,MachineBasicBlock * failure_bb)922 void HeavyOptimizerFrontend::MemoryRegionReservationSwapWithLockedOwner(
923     Register aligned_addr,
924     Register curr_reservation_value,
925     Register new_reservation_value,
926     MachineBasicBlock* failure_bb) {
927   auto* ir = builder_.ir();
928   auto* lock_success_bb = ir->NewBasicBlock();
929   auto* swap_success_bb = ir->NewBasicBlock();
930   ir->AddEdge(builder_.bb(), lock_success_bb);
931   ir->AddEdge(builder_.bb(), failure_bb);
932   ir->AddEdge(lock_success_bb, swap_success_bb);
933   ir->AddEdge(lock_success_bb, failure_bb);
934 
935   // lock_entry = MemoryRegionReservation::TryLock(aligned_addr, &(state->cpu)).
936   auto* call = builder_.GenCallImm(bit_cast<uint64_t>(&MemoryRegionReservation::TryLock),
937                                    GetFlagsRegister(),
938                                    std::array<x86_64::CallImm::Arg, 2>{{
939                                        {aligned_addr, x86_64::CallImm::kIntRegType},
940                                        {x86_64::kMachineRegRBP, x86_64::CallImm::kIntRegType},
941                                    }});
942   Register lock_entry = AllocTempReg();
943   // Limit life-time of a narrow reg-class call result.
944   Gen<PseudoCopy>(lock_entry, call->IntResultAt(0), 8);
945   Gen<x86_64::TestqRegReg>(lock_entry, lock_entry, GetFlagsRegister());
946   Gen<PseudoCondBranch>(
947       x86_64::Assembler::Condition::kZero, failure_bb, lock_success_bb, GetFlagsRegister());
948 
949   builder_.StartBasicBlock(lock_success_bb);
950   auto rax = AllocTempReg();
951   Gen<PseudoCopy>(rax, curr_reservation_value, 8);
952   Gen<x86_64::LockCmpXchgqRegMemBaseDispReg>(
953       rax, aligned_addr, 0, new_reservation_value, GetFlagsRegister());
954 
955   // MemoryRegionReservation::Unlock(lock_entry)
956   Gen<x86_64::MovqMemBaseDispImm>(lock_entry, 0, 0);
957   // Zero-flag is set if CmpXchg is successful.
958   Gen<PseudoCondBranch>(
959       x86_64::Assembler::Condition::kNotZero, failure_bb, swap_success_bb, GetFlagsRegister());
960 
961   builder_.StartBasicBlock(swap_success_bb);
962 }
963 
964 }  // namespace berberis
965