1 /*
2 * Copyright (C) 2023 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "frontend.h"
18
19 #include <cstddef>
20
21 #include "berberis/assembler/x86_64.h"
22 #include "berberis/backend/common/machine_ir.h"
23 #include "berberis/backend/x86_64/machine_ir.h"
24 #include "berberis/base/checks.h"
25 #include "berberis/base/config.h"
26 #include "berberis/guest_state/guest_state_arch.h"
27 #include "berberis/guest_state/guest_state_opaque.h"
28 #include "berberis/runtime_primitives/memory_region_reservation.h"
29 #include "berberis/runtime_primitives/platform.h"
30
31 namespace berberis {
32
33 using BranchOpcode = HeavyOptimizerFrontend::Decoder::BranchOpcode;
34 using FpRegister = HeavyOptimizerFrontend::FpRegister;
35 using Register = HeavyOptimizerFrontend::Register;
36
CompareAndBranch(BranchOpcode opcode,Register arg1,Register arg2,int16_t offset)37 void HeavyOptimizerFrontend::CompareAndBranch(BranchOpcode opcode,
38 Register arg1,
39 Register arg2,
40 int16_t offset) {
41 auto ir = builder_.ir();
42 auto cur_bb = builder_.bb();
43 MachineBasicBlock* then_bb = ir->NewBasicBlock();
44 MachineBasicBlock* else_bb = ir->NewBasicBlock();
45 ir->AddEdge(cur_bb, then_bb);
46 ir->AddEdge(cur_bb, else_bb);
47
48 Gen<x86_64::CmpqRegReg>(arg1, arg2, GetFlagsRegister());
49 Gen<PseudoCondBranch>(ToAssemblerCond(opcode), then_bb, else_bb, GetFlagsRegister());
50
51 builder_.StartBasicBlock(then_bb);
52 GenJump(pc_ + offset);
53
54 builder_.StartBasicBlock(else_bb);
55 }
56
Branch(int32_t offset)57 void HeavyOptimizerFrontend::Branch(int32_t offset) {
58 is_uncond_branch_ = true;
59 GenJump(pc_ + offset);
60 }
61
BranchRegister(Register src,int16_t offset)62 void HeavyOptimizerFrontend::BranchRegister(Register src, int16_t offset) {
63 is_uncond_branch_ = true;
64 Register target = AllocTempReg();
65 Gen<PseudoCopy>(target, src, 8);
66 // Avoid the extra insn if unneeded.
67 if (offset != 0) {
68 Gen<x86_64::AddqRegImm>(target, offset, GetFlagsRegister());
69 }
70 // TODO(b/232598137) Maybe move this to translation cache?
71 Gen<x86_64::AndqRegImm>(target, ~int32_t{1}, GetFlagsRegister());
72 ExitRegionIndirect(target);
73 }
74
ToAssemblerCond(BranchOpcode opcode)75 x86_64::Assembler::Condition HeavyOptimizerFrontend::ToAssemblerCond(BranchOpcode opcode) {
76 switch (opcode) {
77 case BranchOpcode::kBeq:
78 return x86_64::Assembler::Condition::kEqual;
79 case BranchOpcode::kBne:
80 return x86_64::Assembler::Condition::kNotEqual;
81 case BranchOpcode::kBlt:
82 return x86_64::Assembler::Condition::kLess;
83 case BranchOpcode::kBge:
84 return x86_64::Assembler::Condition::kGreaterEqual;
85 case BranchOpcode::kBltu:
86 return x86_64::Assembler::Condition::kBelow;
87 case BranchOpcode::kBgeu:
88 return x86_64::Assembler::Condition::kAboveEqual;
89 }
90 }
91
GetImm(uint64_t imm)92 Register HeavyOptimizerFrontend::GetImm(uint64_t imm) {
93 Register result = AllocTempReg();
94 Gen<x86_64::MovqRegImm>(result, imm);
95 return result;
96 }
97
AllocTempReg()98 Register HeavyOptimizerFrontend::AllocTempReg() {
99 return builder_.ir()->AllocVReg();
100 }
101
AllocTempSimdReg()102 SimdReg HeavyOptimizerFrontend::AllocTempSimdReg() {
103 return SimdReg{builder_.ir()->AllocVReg()};
104 }
105
GenJump(GuestAddr target)106 void HeavyOptimizerFrontend::GenJump(GuestAddr target) {
107 auto map_it = branch_targets_.find(target);
108 if (map_it == branch_targets_.end()) {
109 // Remember that this address was taken to help region formation. If we
110 // translate it later the data will be overwritten with the actual location.
111 branch_targets_[target] = MachineInsnPosition{};
112 }
113
114 // Checking pending signals only on back jumps guarantees no infinite loops
115 // without pending signal checks.
116 auto kind = target <= GetInsnAddr() ? PseudoJump::Kind::kJumpWithPendingSignalsCheck
117 : PseudoJump::Kind::kJumpWithoutPendingSignalsCheck;
118
119 Gen<PseudoJump>(target, kind);
120 }
121
ExitGeneratedCode(GuestAddr target)122 void HeavyOptimizerFrontend::ExitGeneratedCode(GuestAddr target) {
123 Gen<PseudoJump>(target, PseudoJump::Kind::kExitGeneratedCode);
124 }
125
ExitRegionIndirect(Register target)126 void HeavyOptimizerFrontend::ExitRegionIndirect(Register target) {
127 Gen<PseudoIndirectJump>(target);
128 }
Undefined()129 void HeavyOptimizerFrontend::Undefined() {
130 success_ = false;
131 ExitGeneratedCode(GetInsnAddr());
132 // We don't require region to end here as control flow may jump around
133 // the undefined instruction, so handle it as an unconditional branch.
134 is_uncond_branch_ = true;
135 }
136
IsRegionEndReached() const137 bool HeavyOptimizerFrontend::IsRegionEndReached() const {
138 if (!is_uncond_branch_) {
139 return false;
140 }
141
142 auto map_it = branch_targets_.find(GetInsnAddr());
143 // If this instruction following an unconditional branch isn't reachable by
144 // some other branch - it's a region end.
145 return map_it == branch_targets_.end();
146 }
147
ResolveJumps()148 void HeavyOptimizerFrontend::ResolveJumps() {
149 if (!config::kLinkJumpsWithinRegion) {
150 return;
151 }
152 auto ir = builder_.ir();
153
154 MachineBasicBlockList bb_list_copy(ir->bb_list());
155 for (auto bb : bb_list_copy) {
156 if (bb->is_recovery()) {
157 // Recovery blocks must exit region, do not try to resolve it into a local branch.
158 continue;
159 }
160
161 const MachineInsn* last_insn = bb->insn_list().back();
162 if (last_insn->opcode() != kMachineOpPseudoJump) {
163 continue;
164 }
165
166 auto* jump = static_cast<const PseudoJump*>(last_insn);
167 if (jump->kind() == PseudoJump::Kind::kSyscall ||
168 jump->kind() == PseudoJump::Kind::kExitGeneratedCode) {
169 // Syscall or generated code exit must always exit region.
170 continue;
171 }
172
173 GuestAddr target = jump->target();
174 auto map_it = branch_targets_.find(target);
175 // All PseudoJump insns must add their targets to branch_targets.
176 CHECK(map_it != branch_targets_.end());
177
178 MachineInsnPosition pos = map_it->second;
179 MachineBasicBlock* target_containing_bb = pos.first;
180 if (!target_containing_bb) {
181 // Branch target is not in the current region
182 continue;
183 }
184
185 CHECK(pos.second.has_value());
186 auto target_insn_it = pos.second.value();
187 MachineBasicBlock* target_bb;
188 if (target_insn_it == target_containing_bb->insn_list().begin()) {
189 // We don't need to split if target_insn_it is at the beginning of target_containing_bb.
190 target_bb = target_containing_bb;
191 } else {
192 // target_bb is split from target_containing_bb.
193 target_bb = ir->SplitBasicBlock(target_containing_bb, target_insn_it);
194 UpdateBranchTargetsAfterSplit(target, target_containing_bb, target_bb);
195
196 // Make sure target_bb is also considered for jump resolution. Otherwise we may leave code
197 // referenced by it unlinked from the rest of the IR.
198 bb_list_copy.push_back(target_bb);
199
200 // If bb is equal to target_containing_bb, then the branch instruction at the end of bb
201 // is moved to the new target_bb, so we replace the instruction at the end of the
202 // target_bb instead of bb.
203 if (bb == target_containing_bb) {
204 bb = target_bb;
205 }
206 }
207
208 ReplaceJumpWithBranch(bb, target_bb);
209 }
210 }
211
ReplaceJumpWithBranch(MachineBasicBlock * bb,MachineBasicBlock * target_bb)212 void HeavyOptimizerFrontend::ReplaceJumpWithBranch(MachineBasicBlock* bb,
213 MachineBasicBlock* target_bb) {
214 auto ir = builder_.ir();
215 const auto* last_insn = bb->insn_list().back();
216 CHECK_EQ(last_insn->opcode(), kMachineOpPseudoJump);
217 auto* jump = static_cast<const PseudoJump*>(last_insn);
218 GuestAddr target = static_cast<const PseudoJump*>(jump)->target();
219 // Do not invalidate this iterator as it may be a target for another jump.
220 // Instead overwrite the instruction.
221 auto jump_it = std::prev(bb->insn_list().end());
222
223 if (jump->kind() == PseudoJump::Kind::kJumpWithoutPendingSignalsCheck) {
224 // Simple branch for forward jump.
225 *jump_it = ir->NewInsn<PseudoBranch>(target_bb);
226 ir->AddEdge(bb, target_bb);
227 } else {
228 CHECK(jump->kind() == PseudoJump::Kind::kJumpWithPendingSignalsCheck);
229 // See EmitCheckSignalsAndMaybeReturn.
230 auto* exit_bb = ir->NewBasicBlock();
231 // Note that we intentionally don't mark exit_bb as recovery and therefore don't request its
232 // reordering away from hot code spots. target_bb is a back branch and is unlikely to be a
233 // fall-through jump for the current bb. At the same time exit_bb can be a fall-through jump
234 // and benchmarks benefit from it.
235 const size_t offset = offsetof(ThreadState, pending_signals_status);
236 auto* cmpb = ir->NewInsn<x86_64::CmpbMemBaseDispImm>(
237 x86_64::kMachineRegRBP, offset, kPendingSignalsPresent, GetFlagsRegister());
238 *jump_it = cmpb;
239 auto* cond_branch = ir->NewInsn<PseudoCondBranch>(
240 x86_64::Assembler::Condition::kEqual, exit_bb, target_bb, GetFlagsRegister());
241 bb->insn_list().push_back(cond_branch);
242
243 builder_.StartBasicBlock(exit_bb);
244 ExitGeneratedCode(target);
245
246 ir->AddEdge(bb, exit_bb);
247 ir->AddEdge(bb, target_bb);
248 }
249 }
250
UpdateBranchTargetsAfterSplit(GuestAddr addr,const MachineBasicBlock * old_bb,MachineBasicBlock * new_bb)251 void HeavyOptimizerFrontend::UpdateBranchTargetsAfterSplit(GuestAddr addr,
252 const MachineBasicBlock* old_bb,
253 MachineBasicBlock* new_bb) {
254 auto map_it = branch_targets_.find(addr);
255 CHECK(map_it != branch_targets_.end());
256 while (map_it != branch_targets_.end() && map_it->second.first == old_bb) {
257 map_it->second.first = new_bb;
258 map_it++;
259 }
260 }
261
GetReg(uint8_t reg)262 Register HeavyOptimizerFrontend::GetReg(uint8_t reg) {
263 CHECK_LT(reg, kNumGuestRegs);
264 Register dst = AllocTempReg();
265 builder_.GenGet(dst, GetThreadStateRegOffset(reg));
266 return dst;
267 }
268
SetReg(uint8_t reg,Register value)269 void HeavyOptimizerFrontend::SetReg(uint8_t reg, Register value) {
270 CHECK_LT(reg, kNumGuestRegs);
271 if (success()) {
272 builder_.GenPut(GetThreadStateRegOffset(reg), value);
273 }
274 }
275
GetFpReg(uint8_t reg)276 FpRegister HeavyOptimizerFrontend::GetFpReg(uint8_t reg) {
277 FpRegister result = AllocTempSimdReg();
278 builder_.GenGetSimd<8>(result.machine_reg(), GetThreadStateFRegOffset(reg));
279 return result;
280 }
281
Nop()282 void HeavyOptimizerFrontend::Nop() {}
283
Op(Decoder::OpOpcode opcode,Register arg1,Register arg2)284 Register HeavyOptimizerFrontend::Op(Decoder::OpOpcode opcode, Register arg1, Register arg2) {
285 using OpOpcode = Decoder::OpOpcode;
286 using Condition = x86_64::Assembler::Condition;
287 auto res = AllocTempReg();
288 switch (opcode) {
289 case OpOpcode::kAdd:
290 Gen<PseudoCopy>(res, arg1, 8);
291 Gen<x86_64::AddqRegReg>(res, arg2, GetFlagsRegister());
292 break;
293 case OpOpcode::kSub:
294 Gen<PseudoCopy>(res, arg1, 8);
295 Gen<x86_64::SubqRegReg>(res, arg2, GetFlagsRegister());
296 break;
297 case OpOpcode::kAnd:
298 Gen<PseudoCopy>(res, arg1, 8);
299 Gen<x86_64::AndqRegReg>(res, arg2, GetFlagsRegister());
300 break;
301 case OpOpcode::kOr:
302 Gen<PseudoCopy>(res, arg1, 8);
303 Gen<x86_64::OrqRegReg>(res, arg2, GetFlagsRegister());
304 break;
305 case OpOpcode::kXor:
306 Gen<PseudoCopy>(res, arg1, 8);
307 Gen<x86_64::XorqRegReg>(res, arg2, GetFlagsRegister());
308 break;
309 case OpOpcode::kSll:
310 Gen<PseudoCopy>(res, arg1, 8);
311 Gen<x86_64::ShlqRegReg>(res, arg2, GetFlagsRegister());
312 break;
313 case OpOpcode::kSrl:
314 Gen<PseudoCopy>(res, arg1, 8);
315 Gen<x86_64::ShrqRegReg>(res, arg2, GetFlagsRegister());
316 break;
317 case OpOpcode::kSra:
318 Gen<PseudoCopy>(res, arg1, 8);
319 Gen<x86_64::SarqRegReg>(res, arg2, GetFlagsRegister());
320 break;
321 case OpOpcode::kSlt: {
322 Gen<x86_64::CmpqRegReg>(arg1, arg2, GetFlagsRegister());
323 auto temp = AllocTempReg();
324 Gen<x86_64::SetccReg>(Condition::kLess, temp, GetFlagsRegister());
325 Gen<x86_64::MovzxbqRegReg>(res, temp);
326 break;
327 }
328 case OpOpcode::kSltu: {
329 Gen<x86_64::CmpqRegReg>(arg1, arg2, GetFlagsRegister());
330 auto temp = AllocTempReg();
331 Gen<x86_64::SetccReg>(Condition::kBelow, temp, GetFlagsRegister());
332 Gen<x86_64::MovzxbqRegReg>(res, temp);
333 break;
334 }
335 case OpOpcode::kMul:
336 Gen<PseudoCopy>(res, arg1, 8);
337 Gen<x86_64::ImulqRegReg>(res, arg2, GetFlagsRegister());
338 break;
339 case OpOpcode::kMulh: {
340 auto rax = AllocTempReg();
341 auto rdx = AllocTempReg();
342 Gen<PseudoCopy>(rax, arg1, 8);
343 Gen<x86_64::ImulqRegRegReg>(rax, rdx, arg2, GetFlagsRegister());
344 Gen<PseudoCopy>(res, rdx, 8);
345 } break;
346 case OpOpcode::kMulhsu: {
347 Gen<PseudoCopy>(res, arg1, 8);
348 auto rax = AllocTempReg();
349 auto rdx = AllocTempReg();
350 Gen<PseudoCopy>(rax, arg2, 8);
351 Gen<x86_64::MulqRegRegReg>(rax, rdx, res, GetFlagsRegister());
352 Gen<x86_64::SarqRegImm>(res, 63, GetFlagsRegister());
353 Gen<x86_64::ImulqRegReg>(res, arg2, GetFlagsRegister());
354 Gen<x86_64::AddqRegReg>(res, rdx, GetFlagsRegister());
355 } break;
356 case OpOpcode::kMulhu: {
357 auto rax = AllocTempReg();
358 auto rdx = AllocTempReg();
359 Gen<PseudoCopy>(rax, arg1, 8);
360 Gen<x86_64::MulqRegRegReg>(rax, rdx, arg2, GetFlagsRegister());
361 Gen<PseudoCopy>(res, rdx, 8);
362 } break;
363 case OpOpcode::kAndn:
364 if (host_platform::kHasBMI) {
365 Gen<x86_64::AndnqRegRegReg>(res, arg2, arg1, GetFlagsRegister());
366 } else {
367 Gen<PseudoCopy>(res, arg2, 8);
368 Gen<x86_64::NotqReg>(res);
369 Gen<x86_64::AndqRegReg>(res, arg1, GetFlagsRegister());
370 }
371 break;
372 case OpOpcode::kOrn:
373 Gen<PseudoCopy>(res, arg2, 8);
374 Gen<x86_64::NotqReg>(res);
375 Gen<x86_64::OrqRegReg>(res, arg1, GetFlagsRegister());
376 break;
377 case OpOpcode::kXnor:
378 Gen<PseudoCopy>(res, arg2, 8);
379 Gen<x86_64::XorqRegReg>(res, arg1, GetFlagsRegister());
380 Gen<x86_64::NotqReg>(res);
381 break;
382 default:
383 Undefined();
384 return {};
385 }
386
387 return res;
388 }
389
Op32(Decoder::Op32Opcode opcode,Register arg1,Register arg2)390 Register HeavyOptimizerFrontend::Op32(Decoder::Op32Opcode opcode, Register arg1, Register arg2) {
391 using Op32Opcode = Decoder::Op32Opcode;
392 auto res = AllocTempReg();
393 auto unextended_res = res;
394 switch (opcode) {
395 case Op32Opcode::kAddw:
396 Gen<PseudoCopy>(res, arg1, 4);
397 Gen<x86_64::AddlRegReg>(res, arg2, GetFlagsRegister());
398 break;
399 case Op32Opcode::kSubw:
400 Gen<PseudoCopy>(res, arg1, 4);
401 Gen<x86_64::SublRegReg>(res, arg2, GetFlagsRegister());
402 break;
403 case Op32Opcode::kSllw:
404 case Op32Opcode::kSrlw:
405 case Op32Opcode::kSraw: {
406 auto rcx = AllocTempReg();
407 Gen<PseudoCopy>(res, arg1, 4);
408 Gen<PseudoCopy>(rcx, arg2, 4);
409 if (opcode == Op32Opcode::kSllw) {
410 Gen<x86_64::ShllRegReg>(res, rcx, GetFlagsRegister());
411 } else if (opcode == Op32Opcode::kSrlw) {
412 Gen<x86_64::ShrlRegReg>(res, rcx, GetFlagsRegister());
413 } else {
414 Gen<x86_64::SarlRegReg>(res, rcx, GetFlagsRegister());
415 }
416 } break;
417 case Op32Opcode::kMulw:
418 Gen<PseudoCopy>(res, arg1, 4);
419 Gen<x86_64::ImullRegReg>(res, arg2, GetFlagsRegister());
420 break;
421 default:
422 Undefined();
423 return {};
424 }
425 Gen<x86_64::MovsxlqRegReg>(res, unextended_res);
426 return res;
427 }
428
OpImm(Decoder::OpImmOpcode opcode,Register arg,int16_t imm)429 Register HeavyOptimizerFrontend::OpImm(Decoder::OpImmOpcode opcode, Register arg, int16_t imm) {
430 using OpImmOpcode = Decoder::OpImmOpcode;
431 using Condition = x86_64::Assembler::Condition;
432 auto res = AllocTempReg();
433 switch (opcode) {
434 case OpImmOpcode::kAddi:
435 Gen<PseudoCopy>(res, arg, 8);
436 Gen<x86_64::AddqRegImm>(res, imm, GetFlagsRegister());
437 break;
438 case OpImmOpcode::kSlti: {
439 auto temp = AllocTempReg();
440 Gen<x86_64::CmpqRegImm>(arg, imm, GetFlagsRegister());
441 Gen<x86_64::SetccReg>(Condition::kLess, temp, GetFlagsRegister());
442 Gen<x86_64::MovsxbqRegReg>(res, temp);
443 } break;
444 case OpImmOpcode::kSltiu: {
445 auto temp = AllocTempReg();
446 Gen<x86_64::CmpqRegImm>(arg, imm, GetFlagsRegister());
447 Gen<x86_64::SetccReg>(Condition::kBelow, temp, GetFlagsRegister());
448 Gen<x86_64::MovsxbqRegReg>(res, temp);
449 } break;
450 case OpImmOpcode::kXori:
451 Gen<PseudoCopy>(res, arg, 8);
452 Gen<x86_64::XorqRegImm>(res, imm, GetFlagsRegister());
453 break;
454 case OpImmOpcode::kOri:
455 Gen<PseudoCopy>(res, arg, 8);
456 Gen<x86_64::OrqRegImm>(res, imm, GetFlagsRegister());
457 break;
458 case OpImmOpcode::kAndi:
459 Gen<PseudoCopy>(res, arg, 8);
460 Gen<x86_64::AndqRegImm>(res, imm, GetFlagsRegister());
461 break;
462 default:
463 Undefined();
464 return {};
465 }
466 return res;
467 }
468
OpImm32(Decoder::OpImm32Opcode opcode,Register arg,int16_t imm)469 Register HeavyOptimizerFrontend::OpImm32(Decoder::OpImm32Opcode opcode, Register arg, int16_t imm) {
470 auto res = AllocTempReg();
471 switch (opcode) {
472 case Decoder::OpImm32Opcode::kAddiw:
473 Gen<PseudoCopy>(res, arg, 4);
474 Gen<x86_64::AddlRegImm>(res, imm, GetFlagsRegister());
475 Gen<x86_64::MovsxlqRegReg>(res, res);
476 break;
477 default:
478 Undefined();
479 return {};
480 }
481 return res;
482 }
483
Slli(Register arg,int8_t imm)484 Register HeavyOptimizerFrontend::Slli(Register arg, int8_t imm) {
485 auto res = AllocTempReg();
486 Gen<PseudoCopy>(res, arg, 8);
487 Gen<x86_64::ShlqRegImm>(res, imm, GetFlagsRegister());
488 return res;
489 }
490
Srli(Register arg,int8_t imm)491 Register HeavyOptimizerFrontend::Srli(Register arg, int8_t imm) {
492 auto res = AllocTempReg();
493 Gen<PseudoCopy>(res, arg, 8);
494 Gen<x86_64::ShrqRegImm>(res, imm, GetFlagsRegister());
495 return res;
496 }
497
Srai(Register arg,int8_t imm)498 Register HeavyOptimizerFrontend::Srai(Register arg, int8_t imm) {
499 auto res = AllocTempReg();
500 Gen<PseudoCopy>(res, arg, 8);
501 Gen<x86_64::SarqRegImm>(res, imm, GetFlagsRegister());
502 return res;
503 }
504
ShiftImm32(Decoder::ShiftImm32Opcode opcode,Register arg,uint16_t imm)505 Register HeavyOptimizerFrontend::ShiftImm32(Decoder::ShiftImm32Opcode opcode,
506 Register arg,
507 uint16_t imm) {
508 using ShiftImm32Opcode = Decoder::ShiftImm32Opcode;
509 auto res = AllocTempReg();
510 auto rcx = AllocTempReg();
511 Gen<PseudoCopy>(res, arg, 4);
512 Gen<x86_64::MovlRegImm>(rcx, imm);
513 switch (opcode) {
514 case ShiftImm32Opcode::kSlliw:
515 Gen<x86_64::ShllRegReg>(res, rcx, GetFlagsRegister());
516 break;
517 case ShiftImm32Opcode::kSrliw:
518 Gen<x86_64::ShrlRegReg>(res, rcx, GetFlagsRegister());
519 break;
520 case ShiftImm32Opcode::kSraiw:
521 Gen<x86_64::SarlRegReg>(res, rcx, GetFlagsRegister());
522 break;
523 default:
524 Undefined();
525 break;
526 }
527 Gen<x86_64::MovsxlqRegReg>(res, res);
528 return res;
529 }
530
Rori(Register arg,int8_t shamt)531 Register HeavyOptimizerFrontend::Rori(Register arg, int8_t shamt) {
532 auto res = AllocTempReg();
533 Gen<PseudoCopy>(res, arg, 8);
534 Gen<x86_64::RorqRegImm>(res, shamt, GetFlagsRegister());
535 return res;
536 }
537
Roriw(Register arg,int8_t shamt)538 Register HeavyOptimizerFrontend::Roriw(Register arg, int8_t shamt) {
539 auto res = AllocTempReg();
540 Gen<PseudoCopy>(res, arg, 8);
541 Gen<x86_64::RorlRegImm>(res, shamt, GetFlagsRegister());
542 Gen<x86_64::MovsxlqRegReg>(res, res);
543 return res;
544 }
545
Lui(int32_t imm)546 Register HeavyOptimizerFrontend::Lui(int32_t imm) {
547 auto res = AllocTempReg();
548 Gen<x86_64::MovlRegImm>(res, imm);
549 Gen<x86_64::MovsxlqRegReg>(res, res);
550 return res;
551 }
552
Auipc(int32_t imm)553 Register HeavyOptimizerFrontend::Auipc(int32_t imm) {
554 auto res = GetImm(GetInsnAddr());
555 auto temp = AllocTempReg();
556 Gen<x86_64::MovlRegImm>(temp, imm);
557 Gen<x86_64::MovsxlqRegReg>(temp, temp);
558 Gen<x86_64::AddqRegReg>(res, temp, GetFlagsRegister());
559 return res;
560 }
561
Store(Decoder::MemoryDataOperandType operand_type,Register arg,int16_t offset,Register data)562 void HeavyOptimizerFrontend::Store(Decoder::MemoryDataOperandType operand_type,
563 Register arg,
564 int16_t offset,
565 Register data) {
566 int32_t sx_offset{offset};
567 StoreWithoutRecovery(operand_type, arg, sx_offset, data);
568 GenRecoveryBlockForLastInsn();
569 }
570
Load(Decoder::LoadOperandType operand_type,Register arg,int16_t offset)571 Register HeavyOptimizerFrontend::Load(Decoder::LoadOperandType operand_type,
572 Register arg,
573 int16_t offset) {
574 int32_t sx_offset{offset};
575 auto res = LoadWithoutRecovery(operand_type, arg, sx_offset);
576 GenRecoveryBlockForLastInsn();
577 return res;
578 }
579
GenRecoveryBlockForLastInsn()580 void HeavyOptimizerFrontend::GenRecoveryBlockForLastInsn() {
581 // TODO(b/311240558) Accurate Sigsegv?
582 auto* ir = builder_.ir();
583 auto* current_bb = builder_.bb();
584 auto* continue_bb = ir->NewBasicBlock();
585 auto* recovery_bb = ir->NewBasicBlock();
586 ir->AddEdge(current_bb, continue_bb);
587 ir->AddEdge(current_bb, recovery_bb);
588
589 builder_.SetRecoveryPointAtLastInsn(recovery_bb);
590
591 // Note, even though there are two bb successors, we only explicitly branch to
592 // the continue_bb, since jump to the recovery_bb is set up by the signal
593 // handler.
594 Gen<PseudoBranch>(continue_bb);
595
596 builder_.StartBasicBlock(recovery_bb);
597 ExitGeneratedCode(GetInsnAddr());
598
599 builder_.StartBasicBlock(continue_bb);
600 }
601
602 //
603 // Methods that are not part of SemanticsListener implementation.
604 //
StartInsn()605 void HeavyOptimizerFrontend::StartInsn() {
606 if (is_uncond_branch_) {
607 auto* ir = builder_.ir();
608 builder_.StartBasicBlock(ir->NewBasicBlock());
609 }
610
611 is_uncond_branch_ = false;
612 // The iterators in branch_targets are the last iterators before generating an insn.
613 // We advance iterators by one step in Finalize(), as we'll use it to iterate
614 // the sub-list of instructions starting from the first one for the given
615 // guest address.
616
617 // If a basic block is empty before generating insn, an empty optional typed
618 // value is returned. We will resolve it to the first insn of the basic block
619 // in Finalize().
620 branch_targets_[GetInsnAddr()] = builder_.GetMachineInsnPosition();
621 }
622
Finalize(GuestAddr stop_pc)623 void HeavyOptimizerFrontend::Finalize(GuestAddr stop_pc) {
624 // Make sure the last basic block isn't empty before fixing iterators in
625 // branch_targets.
626 if (builder_.bb()->insn_list().empty() ||
627 !builder_.ir()->IsControlTransfer(builder_.bb()->insn_list().back())) {
628 GenJump(stop_pc);
629 }
630
631 // This loop advances the iterators in the branch_targets by one. Because in
632 // StartInsn(), we saved the iterator to the last insn before we generate the
633 // first insn for each guest address. If an insn is saved as an empty optional,
634 // then the basic block is empty before we generate the first insn for the
635 // guest address. So we resolve it to the first insn in the basic block.
636 for (auto& [unused_address, insn_pos] : branch_targets_) {
637 auto& [bb, insn_it] = insn_pos;
638 if (!bb) {
639 // Branch target is not in the current region.
640 continue;
641 }
642
643 if (insn_it.has_value()) {
644 insn_it.value()++;
645 } else {
646 // Make sure bb isn't still empty.
647 CHECK(!bb->insn_list().empty());
648 insn_it = bb->insn_list().begin();
649 }
650 }
651
652 ResolveJumps();
653 }
654
LoadWithoutRecovery(Decoder::LoadOperandType operand_type,Register base,int32_t disp)655 Register HeavyOptimizerFrontend::LoadWithoutRecovery(Decoder::LoadOperandType operand_type,
656 Register base,
657 int32_t disp) {
658 auto res = AllocTempReg();
659 switch (operand_type) {
660 case Decoder::LoadOperandType::k8bitUnsigned:
661 Gen<x86_64::MovzxblRegMemBaseDisp>(res, base, disp);
662 break;
663 case Decoder::LoadOperandType::k16bitUnsigned:
664 Gen<x86_64::MovzxwlRegMemBaseDisp>(res, base, disp);
665 break;
666 case Decoder::LoadOperandType::k32bitUnsigned:
667 Gen<x86_64::MovlRegMemBaseDisp>(res, base, disp);
668 break;
669 case Decoder::LoadOperandType::k64bit:
670 Gen<x86_64::MovqRegMemBaseDisp>(res, base, disp);
671 break;
672 case Decoder::LoadOperandType::k8bitSigned:
673 Gen<x86_64::MovsxbqRegMemBaseDisp>(res, base, disp);
674 break;
675 case Decoder::LoadOperandType::k16bitSigned:
676 Gen<x86_64::MovsxwqRegMemBaseDisp>(res, base, disp);
677 break;
678 case Decoder::LoadOperandType::k32bitSigned:
679 Gen<x86_64::MovsxlqRegMemBaseDisp>(res, base, disp);
680 break;
681 default:
682 Undefined();
683 return {};
684 }
685
686 return res;
687 }
688
LoadWithoutRecovery(Decoder::LoadOperandType operand_type,Register base,Register index,int32_t disp)689 Register HeavyOptimizerFrontend::LoadWithoutRecovery(Decoder::LoadOperandType operand_type,
690 Register base,
691 Register index,
692 int32_t disp) {
693 auto res = AllocTempReg();
694 switch (operand_type) {
695 case Decoder::LoadOperandType::k8bitUnsigned:
696 Gen<x86_64::MovzxblRegMemBaseIndexDisp>(
697 res, base, index, x86_64::MachineMemOperandScale::kOne, disp);
698 break;
699 case Decoder::LoadOperandType::k16bitUnsigned:
700 Gen<x86_64::MovzxwlRegMemBaseIndexDisp>(
701 res, base, index, x86_64::MachineMemOperandScale::kOne, disp);
702 break;
703 case Decoder::LoadOperandType::k32bitUnsigned:
704 Gen<x86_64::MovlRegMemBaseIndexDisp>(
705 res, base, index, x86_64::MachineMemOperandScale::kOne, disp);
706 break;
707 case Decoder::LoadOperandType::k64bit:
708 Gen<x86_64::MovqRegMemBaseIndexDisp>(
709 res, base, index, x86_64::MachineMemOperandScale::kOne, disp);
710 break;
711 case Decoder::LoadOperandType::k8bitSigned:
712 Gen<x86_64::MovsxbqRegMemBaseIndexDisp>(
713 res, base, index, x86_64::MachineMemOperandScale::kOne, disp);
714 break;
715 case Decoder::LoadOperandType::k16bitSigned:
716 Gen<x86_64::MovsxwqRegMemBaseIndexDisp>(
717 res, base, index, x86_64::MachineMemOperandScale::kOne, disp);
718 break;
719 case Decoder::LoadOperandType::k32bitSigned:
720 Gen<x86_64::MovsxlqRegMemBaseIndexDisp>(
721 res, base, index, x86_64::MachineMemOperandScale::kOne, disp);
722 break;
723 default:
724 Undefined();
725 return {};
726 }
727 return res;
728 }
729
UpdateCsr(Decoder::CsrOpcode opcode,Register arg,Register csr)730 Register HeavyOptimizerFrontend::UpdateCsr(Decoder::CsrOpcode opcode, Register arg, Register csr) {
731 Register res = AllocTempReg();
732 switch (opcode) {
733 case Decoder::CsrOpcode::kCsrrs:
734 Gen<PseudoCopy>(res, arg, 8);
735 Gen<x86_64::OrqRegReg>(res, csr, GetFlagsRegister());
736 break;
737 case Decoder::CsrOpcode::kCsrrc:
738 if (host_platform::kHasBMI) {
739 Gen<x86_64::AndnqRegRegReg>(res, arg, csr, GetFlagsRegister());
740 } else {
741 Gen<PseudoCopy>(res, arg, 8);
742 Gen<x86_64::NotqReg>(res);
743 Gen<x86_64::AndqRegReg>(res, csr, GetFlagsRegister());
744 }
745 break;
746 default:
747 Undefined();
748 return {};
749 }
750 return res;
751 }
752
UpdateCsr(Decoder::CsrImmOpcode opcode,uint8_t imm,Register csr)753 Register HeavyOptimizerFrontend::UpdateCsr(Decoder::CsrImmOpcode opcode,
754 uint8_t imm,
755 Register csr) {
756 Register res = AllocTempReg();
757 switch (opcode) {
758 case Decoder::CsrImmOpcode::kCsrrwi:
759 Gen<x86_64::MovlRegImm>(res, imm);
760 break;
761 case Decoder::CsrImmOpcode::kCsrrsi:
762 Gen<x86_64::MovlRegImm>(res, imm);
763 Gen<x86_64::OrqRegReg>(res, csr, GetFlagsRegister());
764 break;
765 case Decoder::CsrImmOpcode::kCsrrci:
766 Gen<x86_64::MovqRegImm>(res, static_cast<int8_t>(~imm));
767 Gen<x86_64::AndqRegReg>(res, csr, GetFlagsRegister());
768 break;
769 default:
770 Undefined();
771 return {};
772 }
773 return res;
774 }
775
StoreWithoutRecovery(Decoder::MemoryDataOperandType operand_type,Register base,int32_t disp,Register data)776 void HeavyOptimizerFrontend::StoreWithoutRecovery(Decoder::MemoryDataOperandType operand_type,
777 Register base,
778 int32_t disp,
779 Register data) {
780 switch (operand_type) {
781 case Decoder::MemoryDataOperandType::k8bit:
782 Gen<x86_64::MovbMemBaseDispReg>(base, disp, data);
783 break;
784 case Decoder::MemoryDataOperandType::k16bit:
785 Gen<x86_64::MovwMemBaseDispReg>(base, disp, data);
786 break;
787 case Decoder::MemoryDataOperandType::k32bit:
788 Gen<x86_64::MovlMemBaseDispReg>(base, disp, data);
789 break;
790 case Decoder::MemoryDataOperandType::k64bit:
791 Gen<x86_64::MovqMemBaseDispReg>(base, disp, data);
792 break;
793 default:
794 return Undefined();
795 }
796 }
797
StoreWithoutRecovery(Decoder::MemoryDataOperandType operand_type,Register base,Register index,int32_t disp,Register data)798 void HeavyOptimizerFrontend::StoreWithoutRecovery(Decoder::MemoryDataOperandType operand_type,
799 Register base,
800 Register index,
801 int32_t disp,
802 Register data) {
803 switch (operand_type) {
804 case Decoder::MemoryDataOperandType::k8bit:
805 Gen<x86_64::MovbMemBaseIndexDispReg>(
806 base, index, x86_64::MachineMemOperandScale::kOne, disp, data);
807 break;
808 case Decoder::MemoryDataOperandType::k16bit:
809 Gen<x86_64::MovwMemBaseIndexDispReg>(
810 base, index, x86_64::MachineMemOperandScale::kOne, disp, data);
811 break;
812 case Decoder::MemoryDataOperandType::k32bit:
813 Gen<x86_64::MovlMemBaseIndexDispReg>(
814 base, index, x86_64::MachineMemOperandScale::kOne, disp, data);
815 break;
816 case Decoder::MemoryDataOperandType::k64bit:
817 Gen<x86_64::MovqMemBaseIndexDispReg>(
818 base, index, x86_64::MachineMemOperandScale::kOne, disp, data);
819 break;
820 default:
821 return Undefined();
822 }
823 }
824
825 // Ordering affecting I/O devices is not relevant to user-space code thus we just ignore bits
826 // related to devices I/O.
Fence(Decoder::FenceOpcode,Register,bool sw,bool sr,bool,bool,bool pw,bool pr,bool,bool)827 void HeavyOptimizerFrontend::Fence(Decoder::FenceOpcode /* opcode */,
828 Register /* src */,
829 bool sw,
830 bool sr,
831 bool /* so */,
832 bool /* si */,
833 bool pw,
834 bool pr,
835 bool /* po */,
836 bool /* pi */) {
837 // Two types of fences (total store ordering fence and normal fence) are supposed to be
838 // processed differently, but only for the “read_fence && write_fence” case (otherwise total
839 // store ordering fence becomes normal fence for the “forward compatibility”), yet because x86
840 // doesn't distinguish between these two types of fences and since we are supposed to map all
841 // not-yet defined fences to normal fence (again, for the “forward compatibility”) it's Ok to
842 // just ignore opcode field.
843 bool read_fence = sr | pr;
844 bool write_fence = sw | pw;
845 if (read_fence) {
846 if (write_fence) {
847 Gen<x86_64::Mfence>();
848 } else {
849 Gen<x86_64::Lfence>();
850 }
851 } else if (write_fence) {
852 Gen<x86_64::Sfence>();
853 }
854 }
855
MemoryRegionReservationLoad(Register aligned_addr)856 void HeavyOptimizerFrontend::MemoryRegionReservationLoad(Register aligned_addr) {
857 // Store aligned_addr in CPUState.
858 int32_t address_offset = GetThreadStateReservationAddressOffset();
859 Gen<x86_64::MovqMemBaseDispReg>(x86_64::kMachineRegRBP, address_offset, aligned_addr);
860
861 // MemoryRegionReservation::SetOwner(aligned_addr, &(state->cpu)).
862 builder_.GenCallImm(bit_cast<uint64_t>(&MemoryRegionReservation::SetOwner),
863 GetFlagsRegister(),
864 std::array<x86_64::CallImm::Arg, 2>{{
865 {aligned_addr, x86_64::CallImm::kIntRegType},
866 {x86_64::kMachineRegRBP, x86_64::CallImm::kIntRegType},
867 }});
868
869 // Load reservation value and store it in CPUState.
870 auto reservation = AllocTempReg();
871 Gen<x86_64::MovqRegMemBaseDisp>(reservation, aligned_addr, 0);
872 int32_t value_offset = GetThreadStateReservationValueOffset();
873 Gen<x86_64::MovqMemBaseDispReg>(x86_64::kMachineRegRBP, value_offset, reservation);
874 }
875
MemoryRegionReservationExchange(Register aligned_addr,Register curr_reservation_value)876 Register HeavyOptimizerFrontend::MemoryRegionReservationExchange(Register aligned_addr,
877 Register curr_reservation_value) {
878 auto* ir = builder_.ir();
879 auto* cur_bb = builder_.bb();
880 auto* addr_match_bb = ir->NewBasicBlock();
881 auto* failure_bb = ir->NewBasicBlock();
882 auto* continue_bb = ir->NewBasicBlock();
883 ir->AddEdge(cur_bb, addr_match_bb);
884 ir->AddEdge(cur_bb, failure_bb);
885 ir->AddEdge(failure_bb, continue_bb);
886 Register result = AllocTempReg();
887
888 // MemoryRegionReservation::Clear.
889 Register stored_aligned_addr = AllocTempReg();
890 int32_t address_offset = GetThreadStateReservationAddressOffset();
891 Gen<x86_64::MovqRegMemBaseDisp>(stored_aligned_addr, x86_64::kMachineRegRBP, address_offset);
892 Gen<x86_64::MovqMemBaseDispImm>(x86_64::kMachineRegRBP, address_offset, kNullGuestAddr);
893 // Compare aligned_addr to the one in CPUState.
894 Gen<x86_64::CmpqRegReg>(stored_aligned_addr, aligned_addr, GetFlagsRegister());
895 Gen<PseudoCondBranch>(
896 x86_64::Assembler::Condition::kNotEqual, failure_bb, addr_match_bb, GetFlagsRegister());
897
898 builder_.StartBasicBlock(addr_match_bb);
899 // Load new reservation value into integer register where CmpXchgq expects it.
900 Register new_reservation_value = AllocTempReg();
901 int32_t value_offset = GetThreadStateReservationValueOffset();
902 Gen<x86_64::MovqRegMemBaseDisp>(new_reservation_value, x86_64::kMachineRegRBP, value_offset);
903
904 MemoryRegionReservationSwapWithLockedOwner(
905 aligned_addr, curr_reservation_value, new_reservation_value, failure_bb);
906
907 ir->AddEdge(builder_.bb(), continue_bb);
908 // Pseudo-def for use-def operand of XOR to make sure data-flow is integrate.
909 Gen<PseudoDefReg>(result);
910 Gen<x86_64::XorqRegReg>(result, result, GetFlagsRegister());
911 Gen<PseudoBranch>(continue_bb);
912
913 builder_.StartBasicBlock(failure_bb);
914 Gen<x86_64::MovqRegImm>(result, 1);
915 Gen<PseudoBranch>(continue_bb);
916
917 builder_.StartBasicBlock(continue_bb);
918
919 return result;
920 }
921
MemoryRegionReservationSwapWithLockedOwner(Register aligned_addr,Register curr_reservation_value,Register new_reservation_value,MachineBasicBlock * failure_bb)922 void HeavyOptimizerFrontend::MemoryRegionReservationSwapWithLockedOwner(
923 Register aligned_addr,
924 Register curr_reservation_value,
925 Register new_reservation_value,
926 MachineBasicBlock* failure_bb) {
927 auto* ir = builder_.ir();
928 auto* lock_success_bb = ir->NewBasicBlock();
929 auto* swap_success_bb = ir->NewBasicBlock();
930 ir->AddEdge(builder_.bb(), lock_success_bb);
931 ir->AddEdge(builder_.bb(), failure_bb);
932 ir->AddEdge(lock_success_bb, swap_success_bb);
933 ir->AddEdge(lock_success_bb, failure_bb);
934
935 // lock_entry = MemoryRegionReservation::TryLock(aligned_addr, &(state->cpu)).
936 auto* call = builder_.GenCallImm(bit_cast<uint64_t>(&MemoryRegionReservation::TryLock),
937 GetFlagsRegister(),
938 std::array<x86_64::CallImm::Arg, 2>{{
939 {aligned_addr, x86_64::CallImm::kIntRegType},
940 {x86_64::kMachineRegRBP, x86_64::CallImm::kIntRegType},
941 }});
942 Register lock_entry = AllocTempReg();
943 // Limit life-time of a narrow reg-class call result.
944 Gen<PseudoCopy>(lock_entry, call->IntResultAt(0), 8);
945 Gen<x86_64::TestqRegReg>(lock_entry, lock_entry, GetFlagsRegister());
946 Gen<PseudoCondBranch>(
947 x86_64::Assembler::Condition::kZero, failure_bb, lock_success_bb, GetFlagsRegister());
948
949 builder_.StartBasicBlock(lock_success_bb);
950 auto rax = AllocTempReg();
951 Gen<PseudoCopy>(rax, curr_reservation_value, 8);
952 Gen<x86_64::LockCmpXchgqRegMemBaseDispReg>(
953 rax, aligned_addr, 0, new_reservation_value, GetFlagsRegister());
954
955 // MemoryRegionReservation::Unlock(lock_entry)
956 Gen<x86_64::MovqMemBaseDispImm>(lock_entry, 0, 0);
957 // Zero-flag is set if CmpXchg is successful.
958 Gen<PseudoCondBranch>(
959 x86_64::Assembler::Condition::kNotZero, failure_bb, swap_success_bb, GetFlagsRegister());
960
961 builder_.StartBasicBlock(swap_success_bb);
962 }
963
964 } // namespace berberis
965