1# RUN: llc -march=amdgcn -mcpu=fiji -start-before=si-peephole-sdwa -verify-machineinstrs -o - %s | FileCheck -check-prefix=SDWA %s
2# RUN: llc -march=amdgcn -mcpu=gfx900 -start-before=si-peephole-sdwa -verify-machineinstrs -o - %s | FileCheck -check-prefix=SDWA %s
3
4# SDWA-LABEL: {{^}}add_f16_u32_preserve
5
6# SDWA: flat_load_dword [[FIRST:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]
7# SDWA: flat_load_dword [[SECOND:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]
8
9# SDWA: v_mul_f32_sdwa [[RES:v[0-9]+]], [[FIRST]], [[SECOND]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:BYTE_3
10# SDWA: v_add_f16_sdwa [[RES:v[0-9]+]], [[FIRST]], [[SECOND]] dst_sel:BYTE_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0 src1_sel:WORD_1
11
12# SDWA: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], [[RES]]
13
14---
15name:            add_f16_u32_preserve
16tracksRegLiveness: true
17registers:
18  - { id: 0, class: vreg_64 }
19  - { id: 1, class: vreg_64 }
20  - { id: 2, class: sreg_64 }
21  - { id: 3, class: vgpr_32 }
22  - { id: 4, class: vgpr_32 }
23  - { id: 5, class: vgpr_32 }
24  - { id: 6, class: vgpr_32 }
25  - { id: 7, class: vgpr_32 }
26  - { id: 8, class: vgpr_32 }
27  - { id: 9, class: vgpr_32 }
28  - { id: 10, class: vgpr_32 }
29  - { id: 11, class: vgpr_32 }
30  - { id: 12, class: vgpr_32 }
31  - { id: 13, class: vgpr_32 }
32body:             |
33  bb.0:
34    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $sgpr30_sgpr31
35
36    %2 = COPY $sgpr30_sgpr31
37    %1 = COPY $vgpr2_vgpr3
38    %0 = COPY $vgpr0_vgpr1
39    %3 = FLAT_LOAD_DWORD %0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
40    %4 = FLAT_LOAD_DWORD %1, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
41
42    %5 = V_AND_B32_e32 65535, %3, implicit $exec
43    %6 = V_LSHRREV_B32_e64 16, %4, implicit $exec
44    %7 = V_BFE_U32 %3, 8, 8, implicit $exec
45    %8 = V_LSHRREV_B32_e32 24, %4, implicit $exec
46
47    %9 = V_ADD_F16_e64 0, %5, 0, %6, 0, 0, implicit $exec
48    %10 = V_LSHLREV_B16_e64 8, %9, implicit $exec
49    %11 = V_MUL_F32_e64 0, %7, 0, %8, 0, 0, implicit $exec
50    %12 = V_LSHLREV_B32_e64 16, %11, implicit $exec
51
52    %13 = V_OR_B32_e64 %10, %12, implicit $exec
53
54    FLAT_STORE_DWORD %0, %13, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
55    $sgpr30_sgpr31 = COPY %2
56    S_SETPC_B64_return $sgpr30_sgpr31
57
58---
59# SDWA-LABEL: sdwa_preserve_keep
60# SDWA: flat_load_dword [[FIRST:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]
61# SDWA: flat_load_dword [[SECOND:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]
62
63# SDWA: v_and_b32_e32 [[AND:v[0-9]+]], 0xff, [[FIRST]]
64# SDWA: v_mov_b32_sdwa [[AND]], [[SECOND]] dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0
65
66# SDWA: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], [[AND]]
67
68name:            sdwa_preserve_keep
69tracksRegLiveness: true
70registers:
71  - { id: 0, class: vreg_64 }
72  - { id: 1, class: vreg_64 }
73  - { id: 2, class: sreg_64 }
74  - { id: 3, class: vgpr_32 }
75  - { id: 4, class: vgpr_32 }
76  - { id: 5, class: sreg_32_xm0_xexec }
77  - { id: 6, class: vgpr_32 }
78  - { id: 7, class: vgpr_32 }
79  - { id: 8, class: sreg_32_xm0 }
80  - { id: 9, class: vgpr_32 }
81  - { id: 10, class: sreg_32_xm0 }
82  - { id: 11, class: vgpr_32 }
83  - { id: 17, class: vgpr_32 }
84body:             |
85  bb.0:
86    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $sgpr30_sgpr31
87
88    %2 = COPY $sgpr30_sgpr31
89    %1 = COPY $vgpr2_vgpr3
90    %0 = COPY $vgpr0_vgpr1
91    %3 = FLAT_LOAD_DWORD %0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
92    %4 = FLAT_LOAD_DWORD %1, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
93
94    %9:vgpr_32 = V_LSHRREV_B16_e64 8, %3, implicit $exec
95    %10:sreg_32_xm0 = S_MOV_B32 255
96    %11:vgpr_32 = V_AND_B32_e64 %3, killed %10, implicit $exec
97    %17:vgpr_32 = V_MOV_B32_sdwa 0, %4, 0, 5, 2, 4, implicit $exec, implicit %11(tied-def 0)
98    FLAT_STORE_DWORD %0, %17, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
99    S_ENDPGM
100
101...
102---
103# SDWA-LABEL: sdwa_preserve_remove
104# SDWA: flat_load_dword [[FIRST:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]
105# SDWA: flat_load_dword [[SECOND:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]
106
107# SDWA: v_mov_b32_sdwa [[FIRST]], [[SECOND]] dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0
108
109# SDWA: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], [[FIRST]]
110
111name:            sdwa_preserve_remove
112tracksRegLiveness: true
113registers:
114  - { id: 0, class: vreg_64 }
115  - { id: 1, class: vreg_64 }
116  - { id: 2, class: sreg_64 }
117  - { id: 3, class: vgpr_32 }
118  - { id: 4, class: vgpr_32 }
119  - { id: 5, class: sreg_32_xm0_xexec }
120  - { id: 6, class: vgpr_32 }
121  - { id: 7, class: vgpr_32 }
122  - { id: 8, class: sreg_32_xm0 }
123  - { id: 9, class: vgpr_32 }
124  - { id: 10, class: sreg_32_xm0 }
125  - { id: 11, class: vgpr_32 }
126  - { id: 17, class: vgpr_32 }
127body:             |
128  bb.0:
129    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $sgpr30_sgpr31
130
131    %2 = COPY $sgpr30_sgpr31
132    %1 = COPY $vgpr2_vgpr3
133    %0 = COPY $vgpr0_vgpr1
134    %3 = FLAT_LOAD_DWORD %0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
135    %4 = FLAT_LOAD_DWORD %1, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
136
137    %9:vgpr_32 = V_LSHRREV_B16_e64 8, %3, implicit $exec
138    %10:sreg_32_xm0 = S_MOV_B32 65535
139    %11:vgpr_32 = V_AND_B32_e64 %3, killed %10, implicit $exec
140    %17:vgpr_32 = V_MOV_B32_sdwa 0, %4, 0, 5, 2, 4, implicit $exec, implicit %11(tied-def 0)
141    FLAT_STORE_DWORD %0, %17, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
142    S_ENDPGM
143
144...
145