1from peachpy import *
2from peachpy.x86_64 import *
3
4import fp16.avx, fp16.avx2
5
6
7arg_fp16 = Argument(ptr(const_uint16_t), name="fp16")
8arg_fp32 = Argument(ptr(uint32_t), name="fp32")
9
10with Function("fp16_alt_xmm_to_fp32_ymm_peachpy__avx2", (arg_fp16, arg_fp32), target=uarch.default + isa.avx2):
11
12    reg_fp16 = GeneralPurposeRegister64()
13    LOAD.ARGUMENT(reg_fp16, arg_fp16)
14
15    reg_fp32 = GeneralPurposeRegister64()
16    LOAD.ARGUMENT(reg_fp32, arg_fp32)
17
18    xmm_fp16 = XMMRegister()
19    VMOVUPS(xmm_fp16, [reg_fp16])
20    ymm_fp32 = fp16.avx2.fp16_alt_xmm_to_fp32_ymm(xmm_fp16)
21    VMOVUPS([reg_fp32], ymm_fp32)
22
23    RETURN()
24
25with Function("fp16_alt_xmm_to_fp32_xmm_peachpy__avx", (arg_fp16, arg_fp32), target=uarch.default + isa.avx):
26
27    reg_fp16 = GeneralPurposeRegister64()
28    LOAD.ARGUMENT(reg_fp16, arg_fp16)
29
30    reg_fp32 = GeneralPurposeRegister64()
31    LOAD.ARGUMENT(reg_fp32, arg_fp32)
32
33    xmm_fp16 = XMMRegister()
34    VMOVUPS(xmm_fp16, [reg_fp16])
35    xmm_fp32 = fp16.avx.fp16_alt_xmm_to_fp32_xmm(xmm_fp16)
36    VMOVUPS([reg_fp32], xmm_fp32)
37
38    RETURN()
39