1 /*
2  * Copyright (C) 2023 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef BERBERIS_HEAVY_OPTIMIZER_RISCV64_INLINE_INTRINSIC_H_
18 #define BERBERIS_HEAVY_OPTIMIZER_RISCV64_INLINE_INTRINSIC_H_
19 
20 #include <cfenv>
21 #include <cstdint>
22 #include <tuple>
23 #include <type_traits>
24 #include <utility>
25 #include <variant>
26 
27 #include "berberis/assembler/x86_64.h"
28 #include "berberis/backend/common/machine_ir.h"
29 #include "berberis/backend/x86_64/machine_insn_intrinsics.h"
30 #include "berberis/backend/x86_64/machine_ir.h"
31 #include "berberis/backend/x86_64/machine_ir_builder.h"
32 #include "berberis/base/checks.h"
33 #include "berberis/base/config.h"
34 #include "berberis/base/dependent_false.h"
35 #include "berberis/intrinsics/common_to_x86/intrinsics_bindings.h"
36 #include "berberis/intrinsics/intrinsics.h"
37 #include "berberis/intrinsics/intrinsics_args.h"
38 #include "berberis/intrinsics/intrinsics_process_bindings.h"
39 #include "berberis/intrinsics/macro_assembler.h"
40 #include "berberis/runtime_primitives/platform.h"
41 
42 #include "simd_register.h"
43 
44 namespace berberis {
45 
46 template <auto kFunction, typename ResType, typename FlagRegister, typename... ArgType>
47 bool TryInlineIntrinsicForHeavyOptimizer(x86_64::MachineIRBuilder* builder,
48                                          ResType result,
49                                          FlagRegister flag_register,
50                                          ArgType... args);
51 
52 template <auto kFunc>
53 class InlineIntrinsic {
54  public:
55   template <typename ResType, typename FlagRegister, typename... ArgType>
TryInlineWithHostRounding(x86_64::MachineIRBuilder * builder,ResType result,FlagRegister flag_register,ArgType...args)56   static bool TryInlineWithHostRounding(x86_64::MachineIRBuilder* builder,
57                                         ResType result,
58                                         FlagRegister flag_register,
59                                         ArgType... args) {
60     std::tuple args_tuple = std::make_tuple(args...);
61     if constexpr (IsTagEq<&intrinsics::FMul<intrinsics::Float64>>()) {
62       auto [rm, frm, src1, src2] = args_tuple;
63       if (rm != FPFlags::DYN) {
64         return false;
65       }
66       return TryInlineIntrinsicForHeavyOptimizer<
67           &intrinsics::FMulHostRounding<intrinsics::Float64>>(
68           builder, result, flag_register, src1, src2);
69     } else if constexpr (IsTagEq<&intrinsics::FMul<intrinsics::Float32>>()) {
70       auto [rm, frm, src1, src2] = args_tuple;
71       if (rm != FPFlags::DYN) {
72         return false;
73       }
74       return TryInlineIntrinsicForHeavyOptimizer<
75           &intrinsics::FMulHostRounding<intrinsics::Float32>>(
76           builder, result, flag_register, src1, src2);
77     } else if constexpr (IsTagEq<&intrinsics::FAdd<intrinsics::Float64>>()) {
78       auto [rm, frm, src1, src2] = args_tuple;
79       if (rm != FPFlags::DYN) {
80         return false;
81       }
82       return TryInlineIntrinsicForHeavyOptimizer<
83           &intrinsics::FAddHostRounding<intrinsics::Float64>>(
84           builder, result, flag_register, src1, src2);
85     } else if constexpr (IsTagEq<&intrinsics::FAdd<intrinsics::Float32>>()) {
86       auto [rm, frm, src1, src2] = args_tuple;
87       if (rm != FPFlags::DYN) {
88         return false;
89       }
90       return TryInlineIntrinsicForHeavyOptimizer<
91           &intrinsics::FAddHostRounding<intrinsics::Float32>>(
92           builder, result, flag_register, src1, src2);
93     } else if constexpr (IsTagEq<&intrinsics::FSub<intrinsics::Float64>>()) {
94       auto [rm, frm, src1, src2] = args_tuple;
95       if (rm != FPFlags::DYN) {
96         return false;
97       }
98       return TryInlineIntrinsicForHeavyOptimizer<
99           &intrinsics::FSubHostRounding<intrinsics::Float64>>(
100           builder, result, flag_register, src1, src2);
101     } else if constexpr (IsTagEq<&intrinsics::FSub<intrinsics::Float32>>()) {
102       auto [rm, frm, src1, src2] = args_tuple;
103       if (rm != FPFlags::DYN) {
104         return false;
105       }
106       return TryInlineIntrinsicForHeavyOptimizer<
107           &intrinsics::FSubHostRounding<intrinsics::Float32>>(
108           builder, result, flag_register, src1, src2);
109     } else if constexpr (IsTagEq<&intrinsics::FDiv<intrinsics::Float64>>()) {
110       auto [rm, frm, src1, src2] = args_tuple;
111       if (rm != FPFlags::DYN) {
112         return false;
113       }
114       return TryInlineIntrinsicForHeavyOptimizer<
115           &intrinsics::FDivHostRounding<intrinsics::Float64>>(
116           builder, result, flag_register, src1, src2);
117     } else if constexpr (IsTagEq<&intrinsics::FDiv<intrinsics::Float32>>()) {
118       auto [rm, frm, src1, src2] = args_tuple;
119       if (rm != FPFlags::DYN) {
120         return false;
121       }
122       return TryInlineIntrinsicForHeavyOptimizer<
123           &intrinsics::FDivHostRounding<intrinsics::Float32>>(
124           builder, result, flag_register, src1, src2);
125     } else if constexpr (IsTagEq<&intrinsics::FCvtFloatToInteger<int64_t, intrinsics::Float64>>()) {
126       auto [rm, frm, src] = args_tuple;
127       if (rm != FPFlags::DYN) {
128         return false;
129       }
130       return TryInlineIntrinsicForHeavyOptimizer<
131           &intrinsics::FCvtFloatToIntegerHostRounding<int64_t, intrinsics::Float64>>(
132           builder, result, flag_register, src);
133     } else if constexpr (IsTagEq<&intrinsics::FCvtFloatToInteger<int64_t, intrinsics::Float32>>()) {
134       auto [rm, frm, src] = args_tuple;
135       if (rm != FPFlags::DYN) {
136         return false;
137       }
138       return TryInlineIntrinsicForHeavyOptimizer<
139           &intrinsics::FCvtFloatToIntegerHostRounding<int64_t, intrinsics::Float32>>(
140           builder, result, flag_register, src);
141     } else if constexpr (IsTagEq<&intrinsics::FCvtFloatToInteger<int32_t, intrinsics::Float64>>()) {
142       auto [rm, frm, src] = args_tuple;
143       if (rm != FPFlags::DYN) {
144         return false;
145       }
146       return TryInlineIntrinsicForHeavyOptimizer<
147           &intrinsics::FCvtFloatToIntegerHostRounding<int32_t, intrinsics::Float64>>(
148           builder, result, flag_register, src);
149     } else if constexpr (IsTagEq<&intrinsics::FCvtFloatToInteger<int32_t, intrinsics::Float32>>()) {
150       auto [rm, frm, src] = args_tuple;
151       if (rm != FPFlags::DYN) {
152         return false;
153       }
154       return TryInlineIntrinsicForHeavyOptimizer<
155           &intrinsics::FCvtFloatToIntegerHostRounding<int32_t, intrinsics::Float32>>(
156           builder, result, flag_register, src);
157     }
158     return false;
159   }
160 
161  private:
162   // Comparison of pointers which point to different functions is generally not a
163   // constexpr since such functions can be merged in object code (comparing
164   // pointers to the same function is constexpr). This helper compares them using
165   // templates explicitly telling that we are not worried about such subtleties here.
166   template <auto kFunction>
167   class FunctionCompareTag;
168 
169   // Note, if we define it as a variable clang doesn't consider it a constexpr in TryInline funcs.
170   template <auto kOtherFunction>
IsTagEq()171   static constexpr bool IsTagEq() {
172     return std::is_same_v<FunctionCompareTag<kFunc>, FunctionCompareTag<kOtherFunction>>;
173   }
174 };
175 
176 template <typename DestRegClass, typename SrcRegClass>
Mov(x86_64::MachineIRBuilder * builder,MachineReg dest,MachineReg src)177 void Mov(x86_64::MachineIRBuilder* builder, MachineReg dest, MachineReg src) {
178   using DestType = typename DestRegClass::Type;
179   using SrcType = typename SrcRegClass::Type;
180   constexpr const auto src_reg_class = SrcRegClass::template kRegClass<x86_64::MachineInsnX86_64>;
181   if constexpr (std::is_integral_v<DestType>) {
182     if constexpr (std::is_integral_v<SrcType>) {
183       builder->Gen<PseudoCopy>(dest, src, src_reg_class.RegSize());
184     } else if constexpr (SrcRegClass::kAsRegister == 'x') {
185       if constexpr (src_reg_class.RegSize() == 4) {
186         if (host_platform::kHasAVX) {
187           builder->Gen<x86_64::VmovdRegXReg>(dest, src);
188         } else {
189           builder->Gen<x86_64::MovdRegXReg>(dest, src);
190         }
191       } else {
192         static_assert(src_reg_class.RegSize() >= 8);
193         if (host_platform::kHasAVX) {
194           builder->Gen<x86_64::VmovqRegXReg>(dest, src);
195         } else {
196           builder->Gen<x86_64::MovqRegXReg>(dest, src);
197         }
198       }
199     } else {
200       static_assert(kDependentTypeFalse<std::tuple<DestRegClass, SrcRegClass>>);
201     }
202   } else if (DestRegClass::kAsRegister == 'x') {
203     if constexpr (src_reg_class.RegSize() == 4) {
204       if constexpr (std::is_integral_v<SrcType>) {
205         if (host_platform::kHasAVX) {
206           builder->Gen<x86_64::VmovdXRegReg>(dest, src);
207         } else {
208           builder->Gen<x86_64::MovdXRegReg>(dest, src);
209         }
210       } else if constexpr (SrcRegClass::kAsRegister == 'x') {
211         builder->Gen<PseudoCopy>(dest, src, 16);
212       } else {
213         static_assert(kDependentTypeFalse<std::tuple<DestRegClass, SrcRegClass>>);
214       }
215     } else {
216       static_assert(src_reg_class.RegSize() >= 8);
217       if constexpr (std::is_integral_v<SrcType>) {
218         if (host_platform::kHasAVX) {
219           builder->Gen<x86_64::VmovqXRegReg>(dest, src);
220         } else {
221           builder->Gen<x86_64::MovqXRegReg>(dest, src);
222         }
223       } else if constexpr (SrcRegClass::kAsRegister == 'x') {
224         builder->Gen<PseudoCopy>(dest, src, 16);
225       } else {
226         static_assert(kDependentTypeFalse<std::tuple<DestRegClass, SrcRegClass>>);
227       }
228     }
229   }
230 }
231 
232 template <typename DestRegClass, typename SrcReg>
MovFromInput(x86_64::MachineIRBuilder * builder,MachineReg dest,SrcReg src)233 void MovFromInput(x86_64::MachineIRBuilder* builder, MachineReg dest, SrcReg src) {
234   if constexpr (std::is_same_v<SrcReg, SimdReg>) {
235     Mov<DestRegClass, intrinsics::bindings::XmmReg>(builder, dest, src.machine_reg());
236   } else {
237     Mov<DestRegClass, intrinsics::bindings::GeneralReg64>(builder, dest, src);
238   }
239 }
240 template <typename SrcRegClass, typename DestReg>
MovToResult(x86_64::MachineIRBuilder * builder,DestReg dest,MachineReg src)241 void MovToResult(x86_64::MachineIRBuilder* builder, DestReg dest, MachineReg src) {
242   if constexpr (std::is_same_v<DestReg, SimdReg>) {
243     Mov<intrinsics::bindings::XmmReg, SrcRegClass>(builder, dest.machine_reg(), src);
244   } else {
245     Mov<intrinsics::bindings::GeneralReg64, SrcRegClass>(builder, dest, src);
246   }
247 }
248 
249 template <auto kFunction, typename ResType, typename FlagRegister, typename... ArgType>
250 class TryBindingBasedInlineIntrinsicForHeavyOptimizer {
251   template <auto kFunctionForFriend,
252             typename ResTypeForFriend,
253             typename FlagRegisterForFriend,
254             typename... ArgTypeForFriend>
255   friend bool TryInlineIntrinsicForHeavyOptimizer(x86_64::MachineIRBuilder* builder,
256                                                   ResTypeForFriend result,
257                                                   FlagRegisterForFriend flag_register,
258                                                   ArgTypeForFriend... args);
259   template <auto kFunctionForFriend, typename FlagRegisterForFriend, typename... ArgTypeForFriend>
260   friend bool TryInlineIntrinsicForHeavyOptimizerVoid(x86_64::MachineIRBuilder* builder,
261                                                       FlagRegisterForFriend flag_register,
262                                                       ArgTypeForFriend... args);
263 
264   template <auto kFunc,
265             typename Assembler_common_x86,
266             typename Assembler_x86_64,
267             typename MacroAssembler,
268             typename Result,
269             typename Callback,
270             typename... Args>
271   friend Result intrinsics::bindings::ProcessBindings(Callback callback,
272                                                       Result def_result,
273                                                       Args&&... args);
274 
275   template <
276       auto kIntrinsicTemplateName,
277       auto kMacroInstructionTemplateName,
278       auto kMnemo,
279       typename GetOpcode,
280       intrinsics::bindings::CPUIDRestriction kCPUIDRestrictionTemplateValue,
281       intrinsics::bindings::PreciseNanOperationsHandling kPreciseNanOperationsHandlingTemplateValue,
282       bool kSideEffectsTemplateValue,
283       typename... Types>
284   friend class intrinsics::bindings::AsmCallInfo;
285 
286   TryBindingBasedInlineIntrinsicForHeavyOptimizer() = delete;
287   TryBindingBasedInlineIntrinsicForHeavyOptimizer(
288       const TryBindingBasedInlineIntrinsicForHeavyOptimizer&) = delete;
289   TryBindingBasedInlineIntrinsicForHeavyOptimizer(
290       TryBindingBasedInlineIntrinsicForHeavyOptimizer&&) = delete;
291   TryBindingBasedInlineIntrinsicForHeavyOptimizer& operator=(
292       const TryBindingBasedInlineIntrinsicForHeavyOptimizer&) = delete;
293   TryBindingBasedInlineIntrinsicForHeavyOptimizer& operator=(
294       TryBindingBasedInlineIntrinsicForHeavyOptimizer&&) = delete;
295 
TryBindingBasedInlineIntrinsicForHeavyOptimizer(x86_64::MachineIRBuilder * builder,ResType result,FlagRegister flag_register,ArgType...args)296   TryBindingBasedInlineIntrinsicForHeavyOptimizer(x86_64::MachineIRBuilder* builder,
297                                                   ResType result,
298                                                   FlagRegister flag_register,
299                                                   ArgType... args)
300       : builder_(builder),
301         result_{result},
302         xmm_result_reg_{},
303         flag_register_{flag_register},
304         input_args_(std::tuple{args...}),
305         success_(
306             intrinsics::bindings::ProcessBindings<kFunction,
307                                                   AssemblerX86<x86_64::Assembler>,
308                                                   x86_64::Assembler,
309                                                   std::tuple<MacroAssembler<x86_64::Assembler>>,
310                                                   bool,
311                                                   TryBindingBasedInlineIntrinsicForHeavyOptimizer&>(
312                 *this,
313                 false)) {}
314 
315   operator bool() { return success_; }
316 
317   // TODO(b/232598137) The MachineIR bindings for some macros can't be instantiated yet. This should
318   // be removed once they're supported.
319   template <typename AsmCallInfo,
320             std::enable_if_t<AsmCallInfo::template kOpcode<MachineOpcode> ==
321                                  MachineOpcode::kMachineOpUndefined,
322                              bool> = true>
operator()323   std::optional<bool> /*ProcessBindingsClient*/ operator()(AsmCallInfo /* asm_call_info */) {
324     return false;
325   }
326 
327   template <typename AsmCallInfo,
328             std::enable_if_t<AsmCallInfo::template kOpcode<MachineOpcode> !=
329                                  MachineOpcode::kMachineOpUndefined,
330                              bool> = true>
operator()331   std::optional<bool> /*ProcessBindingsClient*/ operator()(AsmCallInfo asm_call_info) {
332     static_assert(std::is_same_v<decltype(kFunction), typename AsmCallInfo::IntrinsicType>);
333     if constexpr (AsmCallInfo::kPreciseNanOperationsHandling !=
334                   intrinsics::bindings::kNoNansOperation) {
335       return false;
336     }
337 
338     if constexpr (AsmCallInfo::kCPUIDRestriction == intrinsics::bindings::kHasAVX) {
339       if (!host_platform::kHasAVX) {
340         return false;
341       }
342     } else if constexpr (AsmCallInfo::kCPUIDRestriction == intrinsics::bindings::kHasBMI) {
343       if (!host_platform::kHasBMI) {
344         return false;
345       }
346     } else if constexpr (AsmCallInfo::kCPUIDRestriction == intrinsics::bindings::kHasLZCNT) {
347       if (!host_platform::kHasLZCNT) {
348         return false;
349       }
350     } else if constexpr (AsmCallInfo::kCPUIDRestriction == intrinsics::bindings::kHasPOPCNT) {
351       if (!host_platform::kHasPOPCNT) {
352         return false;
353       }
354     } else if constexpr (AsmCallInfo::kCPUIDRestriction ==
355                          intrinsics::bindings::kNoCPUIDRestriction) {
356       // No restrictions. Do nothing.
357     } else {
358       static_assert(berberis::kDependentValueFalse<AsmCallInfo::kCPUIDRestriction>);
359     }
360 
361     // constructor_args_t here is used to generate a tuple of constructor args from the AsmCallInfo
362     // bindings. The tuple parameter pack will be expanded by the tuple specialization on the
363     // MachineInsn in machine_insn_intrinsics.h.
364     using MachineInsn = typename AsmCallInfo::template MachineInsn<berberis::x86_64::MachineInsn,
365                                                                    x86_64::constructor_args_t,
366                                                                    MachineOpcode>;
367     std::apply(MachineInsn::kGenFunc,
368                std::tuple_cat(std::tuple<x86_64::MachineIRBuilder&>{*builder_},
369                               UnwrapSimdReg(AsmCallInfo::template MakeTuplefromBindings<
370                                             TryBindingBasedInlineIntrinsicForHeavyOptimizer&>(
371                                   *this, asm_call_info))));
372     ProcessBindingsResults<AsmCallInfo>(type_wrapper<typename AsmCallInfo::Bindings>());
373     return true;
374   }
375 
376   template <typename ArgBinding, typename AsmCallInfo>
operator()377   auto /*MakeTuplefromBindingsClient*/ operator()(ArgTraits<ArgBinding>, AsmCallInfo) {
378     static constexpr const auto& arg_info = ArgTraits<ArgBinding>::arg_info;
379     if constexpr (arg_info.arg_type == ArgInfo::IMM_ARG) {
380       auto imm = std::get<arg_info.from>(input_args_);
381       return std::tuple{imm};
382     } else {
383       return ProcessArgInput<ArgBinding, AsmCallInfo>();
384     }
385   }
386 
387   template <typename ArgBinding, typename AsmCallInfo>
ProcessArgInput()388   auto ProcessArgInput() {
389     static constexpr const auto& arg_info = ArgTraits<ArgBinding>::arg_info;
390     using RegisterClass = typename ArgTraits<ArgBinding>::RegisterClass;
391     using Usage = typename ArgTraits<ArgBinding>::Usage;
392     static constexpr const auto kNumOut = std::tuple_size_v<typename AsmCallInfo::OutputArguments>;
393 
394     if constexpr (arg_info.arg_type == ArgInfo::IN_ARG) {
395       static_assert(std::is_same_v<Usage, intrinsics::bindings::Use>);
396       static_assert(!RegisterClass::kIsImplicitReg);
397       if constexpr (RegisterClass::kAsRegister == 'x' &&
398                     std::is_same_v<std::tuple_element_t<arg_info.from, std::tuple<ArgType...>>,
399                                    MachineReg>) {
400         auto xmm_reg = AllocVReg();
401         MovFromInput<RegisterClass>(builder_, xmm_reg, std::get<arg_info.from>(input_args_));
402         return std::tuple{xmm_reg};
403       } else {
404         return std::tuple{std::get<arg_info.from>(input_args_)};
405       }
406     } else if constexpr (arg_info.arg_type == ArgInfo::IN_OUT_ARG) {
407       static_assert(!std::is_same_v<ResType, std::monostate>);
408       static_assert(std::is_same_v<Usage, intrinsics::bindings::UseDef>);
409       static_assert(!RegisterClass::kIsImplicitReg);
410       if constexpr (RegisterClass::kAsRegister == 'x') {
411         if constexpr (kNumOut > 1) {
412           static_assert(kDependentTypeFalse<ArgTraits<ArgBinding>>);
413         } else {
414           CHECK(xmm_result_reg_.IsInvalidReg());
415           xmm_result_reg_ = AllocVReg();
416           MovFromInput<RegisterClass>(
417               builder_, xmm_result_reg_, std::get<arg_info.from>(input_args_));
418           return std::tuple{xmm_result_reg_};
419         }
420       } else if constexpr (kNumOut > 1) {
421         auto res = std::get<arg_info.to>(result_);
422         MovFromInput<RegisterClass>(builder_, res, std::get<arg_info.from>(input_args_));
423         return std::tuple{res};
424       } else {
425         MovFromInput<RegisterClass>(builder_, result_, std::get<arg_info.from>(input_args_));
426         return std::tuple{result_};
427       }
428     } else if constexpr (arg_info.arg_type == ArgInfo::IN_OUT_TMP_ARG) {
429       static_assert(!std::is_same_v<ResType, std::monostate>);
430       static_assert(std::is_same_v<Usage, intrinsics::bindings::UseDef>);
431       static_assert(RegisterClass::kIsImplicitReg);
432       if constexpr (kNumOut > 1) {
433         static_assert(kDependentTypeFalse<ArgTraits<ArgBinding>>);
434       } else {
435         CHECK(implicit_result_reg_.IsInvalidReg());
436         implicit_result_reg_ = AllocVReg();
437         MovFromInput<RegisterClass>(
438             builder_, implicit_result_reg_, std::get<arg_info.from>(input_args_));
439         return std::tuple{implicit_result_reg_};
440       }
441     } else if constexpr (arg_info.arg_type == ArgInfo::IN_TMP_ARG) {
442       if constexpr (RegisterClass::kIsImplicitReg) {
443         auto implicit_reg = AllocVReg();
444         MovFromInput<RegisterClass>(builder_, implicit_reg, std::get<arg_info.from>(input_args_));
445         return std::tuple{implicit_reg};
446       } else {
447         static_assert(std::is_same_v<Usage, intrinsics::bindings::UseDef>);
448         return std::tuple{std::get<arg_info.from>(input_args_)};
449       }
450     } else if constexpr (arg_info.arg_type == ArgInfo::OUT_TMP_ARG) {
451       if constexpr (kNumOut > 1) {
452         static_assert(kDependentTypeFalse<ArgTraits<ArgBinding>>);
453       } else {
454         CHECK(implicit_result_reg_.IsInvalidReg());
455         implicit_result_reg_ = AllocVReg();
456         return std::tuple{implicit_result_reg_};
457       }
458     } else if constexpr (arg_info.arg_type == ArgInfo::OUT_ARG) {
459       static_assert(!std::is_same_v<ResType, std::monostate>);
460       static_assert(std::is_same_v<Usage, intrinsics::bindings::Def> ||
461                     std::is_same_v<Usage, intrinsics::bindings::DefEarlyClobber>);
462       if constexpr (RegisterClass::kAsRegister == 'x') {
463         CHECK(xmm_result_reg_.IsInvalidReg());
464         xmm_result_reg_ = AllocVReg();
465         return std::tuple{xmm_result_reg_};
466       } else if constexpr (kNumOut > 1) {
467         return std::tuple{std::get<arg_info.to>(result_)};
468       } else if constexpr (RegisterClass::kIsImplicitReg) {
469         if constexpr (RegisterClass::kAsRegister == 0) {
470           return std::tuple{flag_register_};
471         } else {
472           CHECK(implicit_result_reg_.IsInvalidReg());
473           implicit_result_reg_ = AllocVReg();
474           return std::tuple{implicit_result_reg_};
475         }
476       } else {
477         return std::tuple{result_};
478       }
479     } else if constexpr (arg_info.arg_type == ArgInfo::TMP_ARG) {
480       static_assert(std::is_same_v<Usage, intrinsics::bindings::Def> ||
481                     std::is_same_v<Usage, intrinsics::bindings::DefEarlyClobber>);
482       if constexpr (RegisterClass::kAsRegister == 'm') {
483         static_assert(std::is_same_v<Usage, intrinsics::bindings::DefEarlyClobber>);
484         if (scratch_arg_ >= 2) {
485           FATAL("Only two scratch registers are supported for now");
486         }
487         return std::tuple{x86_64::kMachineRegRBP,
488                           static_cast<int32_t>(offsetof(ThreadState, intrinsics_scratch_area) +
489                                                config::kScratchAreaSlotSize * scratch_arg_++)};
490       } else if constexpr (RegisterClass::kIsImplicitReg) {
491         if constexpr (RegisterClass::kAsRegister == 0) {
492           return std::tuple{flag_register_};
493         } else {
494           auto implicit_reg = AllocVReg();
495           return std::tuple{implicit_reg};
496         }
497       } else {
498         auto reg = AllocVReg();
499         return std::tuple{reg};
500       }
501     } else {
502       static_assert(berberis::kDependentValueFalse<arg_info.arg_type>);
503     }
504   }
505 
506   template <typename T>
507   struct type_wrapper {
508     using type = T;
509   };
510 
511   template <typename AsmCallInfo, typename... ArgBinding>
ProcessBindingsResults(type_wrapper<std::tuple<ArgBinding...>>)512   void ProcessBindingsResults(type_wrapper<std::tuple<ArgBinding...>>) {
513     (ProcessBindingResult<ArgBinding, AsmCallInfo>(), ...);
514     if constexpr (std::tuple_size_v<typename AsmCallInfo::OutputArguments> == 0) {
515       // No return value. Do nothing.
516     } else if constexpr (std::tuple_size_v<typename AsmCallInfo::OutputArguments> == 1) {
517       using ReturnType = std::tuple_element_t<0, typename AsmCallInfo::OutputArguments>;
518       if constexpr (std::is_integral_v<ReturnType> && sizeof(ReturnType) < sizeof(int32_t)) {
519         // Don't handle these types just yet. We are not sure how to expand them and there
520         // are no examples.
521         static_assert(kDependentTypeFalse<ReturnType>);
522       }
523       if constexpr (std::is_same_v<ReturnType, int32_t> || std::is_same_v<ReturnType, uint32_t>) {
524         // Expands 32 bit values as signed. Even if actual results are processed as unsigned!
525         // TODO(b/308951522) replace with Expand node when it's created.
526         builder_->Gen<x86_64::MovsxlqRegReg>(result_, result_);
527       } else if constexpr (std::is_integral_v<ReturnType> &&
528                            sizeof(ReturnType) == sizeof(int64_t)) {
529         // Do nothing, we have already produced expanded value.
530       } else if constexpr (std::is_same_v<ReturnType, intrinsics::Float32> ||
531                            std::is_same_v<ReturnType, intrinsics::Float64>) {
532         // Do nothing, NaN boxing is handled by semantics player.
533       } else {
534         static_assert(kDependentTypeFalse<ReturnType>);
535       }
536     } else {
537       static_assert(kDependentTypeFalse<typename AsmCallInfo::OutputArguments>);
538     }
539   }
540 
541   template <typename ArgBinding, typename AsmCallInfo>
ProcessBindingResult()542   void ProcessBindingResult() {
543     if constexpr (ArgTraits<ArgBinding>::Class::kIsImmediate) {
544       return;
545     } else {
546       using RegisterClass = typename ArgTraits<ArgBinding>::RegisterClass;
547       static constexpr const auto& arg_info = ArgTraits<ArgBinding>::arg_info;
548       if constexpr (RegisterClass::kAsRegister == 'm' || RegisterClass::kAsRegister == 0) {
549         return;
550       } else if constexpr ((arg_info.arg_type == ArgInfo::IN_OUT_ARG ||
551                             arg_info.arg_type == ArgInfo::OUT_ARG) &&
552                            RegisterClass::kAsRegister == 'x') {
553         CHECK(!xmm_result_reg_.IsInvalidReg());
554         MovToResult<RegisterClass>(builder_, result_, xmm_result_reg_);
555       } else if constexpr ((arg_info.arg_type == ArgInfo::OUT_ARG ||
556                             arg_info.arg_type == ArgInfo::IN_OUT_TMP_ARG ||
557                             arg_info.arg_type == ArgInfo::OUT_TMP_ARG) &&
558                            RegisterClass::kIsImplicitReg) {
559         CHECK(!implicit_result_reg_.IsInvalidReg());
560         MovToResult<RegisterClass>(builder_, result_, implicit_result_reg_);
561       }
562     }
563   }
564 
AllocVReg()565   MachineReg AllocVReg() { return builder_->ir()->AllocVReg(); }
566 
567   template <typename T>
UnwrapSimdReg(T r)568   static constexpr auto UnwrapSimdReg(T r) {
569     if constexpr (std::is_same_v<T, SimdReg>) {
570       return r.machine_reg();
571     } else {
572       return r;
573     }
574   }
575 
576   template <typename... T>
UnwrapSimdReg(std::tuple<T...> regs)577   static constexpr auto UnwrapSimdReg(std::tuple<T...> regs) {
578     constexpr const auto num_args = std::tuple_size<std::tuple<T...>>::value;
579     return UnwrapSimdReg(std::make_index_sequence<num_args>(), regs);
580   }
581 
582   template <typename... T, auto... I>
UnwrapSimdReg(std::index_sequence<I...>,std::tuple<T...> regs)583   static constexpr auto UnwrapSimdReg(std::index_sequence<I...>, std::tuple<T...> regs) {
584     return std::make_tuple(UnwrapSimdReg(std::get<I>(regs))...);
585   }
586 
587  private:
588   x86_64::MachineIRBuilder* builder_;
589   ResType result_;
590   MachineReg xmm_result_reg_;
591   MachineReg implicit_result_reg_;
592   FlagRegister flag_register_;
593   std::tuple<ArgType...> input_args_;
594   uint32_t scratch_arg_ = 0;
595   bool success_;
596 };
597 
598 template <auto kFunction, typename ResType, typename FlagRegister, typename... ArgType>
TryInlineIntrinsicForHeavyOptimizer(x86_64::MachineIRBuilder * builder,ResType result,FlagRegister flag_register,ArgType...args)599 bool TryInlineIntrinsicForHeavyOptimizer(x86_64::MachineIRBuilder* builder,
600                                          ResType result,
601                                          FlagRegister flag_register,
602                                          ArgType... args) {
603   if (InlineIntrinsic<kFunction>::TryInlineWithHostRounding(
604           builder, result, flag_register, args...)) {
605     return true;
606   }
607 
608   return TryBindingBasedInlineIntrinsicForHeavyOptimizer<kFunction,
609                                                          ResType,
610                                                          FlagRegister,
611                                                          ArgType...>(
612       builder, result, flag_register, args...);
613 }
614 
615 template <auto kFunction, typename ResType, typename FlagRegister, typename... ArgType>
InlineIntrinsicForHeavyOptimizer(x86_64::MachineIRBuilder * builder,ResType result,FlagRegister flag_register,ArgType...args)616 void InlineIntrinsicForHeavyOptimizer(x86_64::MachineIRBuilder* builder,
617                                       ResType result,
618                                       FlagRegister flag_register,
619                                       ArgType... args) {
620   bool success = TryInlineIntrinsicForHeavyOptimizer<kFunction, ResType, FlagRegister, ArgType...>(
621       builder, result, flag_register, args...);
622   CHECK(success);
623 }
624 
625 template <auto kFunction, typename FlagRegister, typename... ArgType>
TryInlineIntrinsicForHeavyOptimizerVoid(x86_64::MachineIRBuilder * builder,FlagRegister flag_register,ArgType...args)626 bool TryInlineIntrinsicForHeavyOptimizerVoid(x86_64::MachineIRBuilder* builder,
627                                              FlagRegister flag_register,
628                                              ArgType... args) {
629   return TryBindingBasedInlineIntrinsicForHeavyOptimizer<kFunction,
630                                                          std::monostate,
631                                                          FlagRegister,
632                                                          ArgType...>(
633       builder, std::monostate{}, flag_register, args...);
634 }
635 
636 template <auto kFunction, typename FlagRegister, typename... ArgType>
InlineIntrinsicForHeavyOptimizerVoid(x86_64::MachineIRBuilder * builder,FlagRegister flag_register,ArgType...args)637 void InlineIntrinsicForHeavyOptimizerVoid(x86_64::MachineIRBuilder* builder,
638                                           FlagRegister flag_register,
639                                           ArgType... args) {
640   bool success = TryInlineIntrinsicForHeavyOptimizerVoid<kFunction, FlagRegister, ArgType...>(
641       builder, flag_register, args...);
642   CHECK(success);
643 }
644 
645 }  // namespace berberis
646 
647 #endif  // BERBERIS_HEAVY_OPTIMIZER_RISCV64_INLINE_INTRINSIC_H_
648