Lines Matching refs:GFX803

4 …attr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX803,NO-D16-HI %s
22 ; GFX803-LABEL: load_local_lo_v2i16_undeflo:
23 ; GFX803: ; %bb.0: ; %entry
24 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
25 ; GFX803-NEXT: s_mov_b32 m0, -1
26 ; GFX803-NEXT: ds_read_u16 v0, v0
27 ; GFX803-NEXT: s_waitcnt lgkmcnt(0)
28 ; GFX803-NEXT: s_setpc_b64 s[30:31]
54 ; GFX803-LABEL: load_local_lo_v2i16_reglo:
55 ; GFX803: ; %bb.0: ; %entry
56 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
57 ; GFX803-NEXT: s_mov_b32 m0, -1
58 ; GFX803-NEXT: ds_read_u16 v0, v0
59 ; GFX803-NEXT: v_lshlrev_b32_e32 v1, 16, v1
60 ; GFX803-NEXT: s_waitcnt lgkmcnt(0)
61 ; GFX803-NEXT: v_or_b32_e32 v0, v0, v1
62 ; GFX803-NEXT: s_setpc_b64 s[30:31]
94 ; GFX803-LABEL: load_local_lo_v2i16_reglo_vreg:
95 ; GFX803: ; %bb.0: ; %entry
96 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
97 ; GFX803-NEXT: s_mov_b32 m0, -1
98 ; GFX803-NEXT: ds_read_u16 v0, v0
99 ; GFX803-NEXT: v_lshlrev_b32_e32 v1, 16, v1
100 ; GFX803-NEXT: s_waitcnt lgkmcnt(0)
101 ; GFX803-NEXT: v_or_b32_e32 v0, v0, v1
102 ; GFX803-NEXT: flat_store_dword v[0:1], v0
103 ; GFX803-NEXT: s_waitcnt vmcnt(0)
104 ; GFX803-NEXT: s_setpc_b64 s[30:31]
131 ; GFX803-LABEL: load_local_lo_v2i16_zerolo:
132 ; GFX803: ; %bb.0: ; %entry
133 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
134 ; GFX803-NEXT: s_mov_b32 m0, -1
135 ; GFX803-NEXT: ds_read_u16 v0, v0
136 ; GFX803-NEXT: s_waitcnt lgkmcnt(0)
137 ; GFX803-NEXT: s_setpc_b64 s[30:31]
164 ; GFX803-LABEL: load_local_lo_v2f16_fpimm:
165 ; GFX803: ; %bb.0: ; %entry
166 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
167 ; GFX803-NEXT: s_mov_b32 m0, -1
168 ; GFX803-NEXT: ds_read_u16 v0, v0
169 ; GFX803-NEXT: s_waitcnt lgkmcnt(0)
170 ; GFX803-NEXT: v_or_b32_e32 v0, 2.0, v0
171 ; GFX803-NEXT: s_setpc_b64 s[30:31]
200 ; GFX803-LABEL: load_local_lo_v2f16_reghi_vreg:
201 ; GFX803: ; %bb.0: ; %entry
202 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
203 ; GFX803-NEXT: s_mov_b32 m0, -1
204 ; GFX803-NEXT: ds_read_u16 v0, v0
205 ; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
206 ; GFX803-NEXT: s_waitcnt lgkmcnt(0)
207 ; GFX803-NEXT: v_or_b32_e32 v0, v0, v1
208 ; GFX803-NEXT: flat_store_dword v[0:1], v0
209 ; GFX803-NEXT: s_waitcnt vmcnt(0)
210 ; GFX803-NEXT: s_setpc_b64 s[30:31]
242 ; GFX803-LABEL: load_local_lo_v2f16_reglo_vreg:
243 ; GFX803: ; %bb.0: ; %entry
244 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
245 ; GFX803-NEXT: s_mov_b32 m0, -1
246 ; GFX803-NEXT: ds_read_u16 v0, v0
247 ; GFX803-NEXT: v_lshlrev_b32_e32 v1, 16, v1
248 ; GFX803-NEXT: s_waitcnt lgkmcnt(0)
249 ; GFX803-NEXT: v_or_b32_e32 v0, v0, v1
250 ; GFX803-NEXT: flat_store_dword v[0:1], v0
251 ; GFX803-NEXT: s_waitcnt vmcnt(0)
252 ; GFX803-NEXT: s_setpc_b64 s[30:31]
282 ; GFX803-LABEL: load_local_lo_v2i16_reghi_vreg_zexti8:
283 ; GFX803: ; %bb.0: ; %entry
284 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
285 ; GFX803-NEXT: s_mov_b32 m0, -1
286 ; GFX803-NEXT: ds_read_u8 v0, v0
287 ; GFX803-NEXT: v_lshrrev_b32_e32 v1, 16, v1
288 ; GFX803-NEXT: s_mov_b32 s4, 0x5040c00
289 ; GFX803-NEXT: s_waitcnt lgkmcnt(0)
290 ; GFX803-NEXT: v_perm_b32 v0, v1, v0, s4
291 ; GFX803-NEXT: flat_store_dword v[0:1], v0
292 ; GFX803-NEXT: s_waitcnt vmcnt(0)
293 ; GFX803-NEXT: s_setpc_b64 s[30:31]
326 ; GFX803-LABEL: load_local_lo_v2i16_reglo_vreg_zexti8:
327 ; GFX803: ; %bb.0: ; %entry
328 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
329 ; GFX803-NEXT: s_mov_b32 m0, -1
330 ; GFX803-NEXT: ds_read_u8 v0, v0
331 ; GFX803-NEXT: v_lshlrev_b32_e32 v1, 16, v1
332 ; GFX803-NEXT: s_waitcnt lgkmcnt(0)
333 ; GFX803-NEXT: v_or_b32_e32 v0, v0, v1
334 ; GFX803-NEXT: flat_store_dword v[0:1], v0
335 ; GFX803-NEXT: s_waitcnt vmcnt(0)
336 ; GFX803-NEXT: s_setpc_b64 s[30:31]
367 ; GFX803-LABEL: load_local_lo_v2i16_reghi_vreg_sexti8:
368 ; GFX803: ; %bb.0: ; %entry
369 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
370 ; GFX803-NEXT: s_mov_b32 m0, -1
371 ; GFX803-NEXT: ds_read_i8 v0, v0
372 ; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
373 ; GFX803-NEXT: s_waitcnt lgkmcnt(0)
374 ; GFX803-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1…
375 ; GFX803-NEXT: flat_store_dword v[0:1], v0
376 ; GFX803-NEXT: s_waitcnt vmcnt(0)
377 ; GFX803-NEXT: s_setpc_b64 s[30:31]
410 ; GFX803-LABEL: load_local_lo_v2i16_reglo_vreg_sexti8:
411 ; GFX803: ; %bb.0: ; %entry
412 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
413 ; GFX803-NEXT: s_mov_b32 m0, -1
414 ; GFX803-NEXT: ds_read_i8 v0, v0
415 ; GFX803-NEXT: v_lshlrev_b32_e32 v1, 16, v1
416 ; GFX803-NEXT: s_waitcnt lgkmcnt(0)
417 ; GFX803-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1…
418 ; GFX803-NEXT: flat_store_dword v[0:1], v0
419 ; GFX803-NEXT: s_waitcnt vmcnt(0)
420 ; GFX803-NEXT: s_setpc_b64 s[30:31]
453 ; GFX803-LABEL: load_local_lo_v2f16_reglo_vreg_zexti8:
454 ; GFX803: ; %bb.0: ; %entry
455 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
456 ; GFX803-NEXT: s_mov_b32 m0, -1
457 ; GFX803-NEXT: ds_read_u8 v0, v0
458 ; GFX803-NEXT: v_lshlrev_b32_e32 v1, 16, v1
459 ; GFX803-NEXT: s_waitcnt lgkmcnt(0)
460 ; GFX803-NEXT: v_or_b32_e32 v0, v0, v1
461 ; GFX803-NEXT: flat_store_dword v[0:1], v0
462 ; GFX803-NEXT: s_waitcnt vmcnt(0)
463 ; GFX803-NEXT: s_setpc_b64 s[30:31]
497 ; GFX803-LABEL: load_local_lo_v2f16_reglo_vreg_sexti8:
498 ; GFX803: ; %bb.0: ; %entry
499 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
500 ; GFX803-NEXT: s_mov_b32 m0, -1
501 ; GFX803-NEXT: ds_read_i8 v0, v0
502 ; GFX803-NEXT: v_lshlrev_b32_e32 v1, 16, v1
503 ; GFX803-NEXT: s_waitcnt lgkmcnt(0)
504 ; GFX803-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1…
505 ; GFX803-NEXT: flat_store_dword v[0:1], v0
506 ; GFX803-NEXT: s_waitcnt vmcnt(0)
507 ; GFX803-NEXT: s_setpc_b64 s[30:31]
545 ; GFX803-LABEL: load_local_lo_v2i16_reghi_vreg_multi_use_lo:
546 ; GFX803: ; %bb.0: ; %entry
547 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
548 ; GFX803-NEXT: s_mov_b32 m0, -1
549 ; GFX803-NEXT: ds_read_u16 v0, v0
550 ; GFX803-NEXT: v_mov_b32_e32 v2, 0
551 ; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
552 ; GFX803-NEXT: s_waitcnt lgkmcnt(0)
553 ; GFX803-NEXT: ds_write_b16 v2, v0
554 ; GFX803-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1…
555 ; GFX803-NEXT: flat_store_dword v[0:1], v0
556 ; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
557 ; GFX803-NEXT: s_setpc_b64 s[30:31]
594 ; GFX803-LABEL: load_local_lo_v2i16_reghi_vreg_multi_use_hi:
595 ; GFX803: ; %bb.0: ; %entry
596 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
597 ; GFX803-NEXT: s_mov_b32 m0, -1
598 ; GFX803-NEXT: ds_read_u16 v0, v0
599 ; GFX803-NEXT: v_lshrrev_b32_e32 v1, 16, v1
600 ; GFX803-NEXT: v_mov_b32_e32 v2, 0
601 ; GFX803-NEXT: ds_write_b16 v2, v1
602 ; GFX803-NEXT: v_lshlrev_b32_e32 v1, 16, v1
603 ; GFX803-NEXT: s_waitcnt lgkmcnt(1)
604 ; GFX803-NEXT: v_or_b32_e32 v0, v0, v1
605 ; GFX803-NEXT: flat_store_dword v[0:1], v0
606 ; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
607 ; GFX803-NEXT: s_setpc_b64 s[30:31]
646 ; GFX803-LABEL: load_local_lo_v2i16_reghi_vreg_multi_use_lohi:
647 ; GFX803: ; %bb.0: ; %entry
648 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
649 ; GFX803-NEXT: s_mov_b32 m0, -1
650 ; GFX803-NEXT: ds_read_u16 v0, v0
651 ; GFX803-NEXT: v_lshrrev_b32_e32 v1, 16, v1
652 ; GFX803-NEXT: s_waitcnt lgkmcnt(0)
653 ; GFX803-NEXT: ds_write_b16 v2, v0
654 ; GFX803-NEXT: ds_write_b16 v3, v1
655 ; GFX803-NEXT: v_lshlrev_b32_e32 v1, 16, v1
656 ; GFX803-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1…
657 ; GFX803-NEXT: flat_store_dword v[0:1], v0
658 ; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
659 ; GFX803-NEXT: s_setpc_b64 s[30:31]
691 ; GFX803-LABEL: load_global_lo_v2i16_reglo_vreg:
692 ; GFX803: ; %bb.0: ; %entry
693 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
694 ; GFX803-NEXT: v_add_u32_e32 v0, vcc, 0xfffff002, v0
695 ; GFX803-NEXT: v_addc_u32_e32 v1, vcc, -1, v1, vcc
696 ; GFX803-NEXT: flat_load_ushort v0, v[0:1]
697 ; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v2
698 ; GFX803-NEXT: s_waitcnt vmcnt(0)
699 ; GFX803-NEXT: v_or_b32_e32 v0, v0, v1
700 ; GFX803-NEXT: flat_store_dword v[0:1], v0
701 ; GFX803-NEXT: s_waitcnt vmcnt(0)
702 ; GFX803-NEXT: s_setpc_b64 s[30:31]
734 ; GFX803-LABEL: load_global_lo_v2f16_reglo_vreg:
735 ; GFX803: ; %bb.0: ; %entry
736 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
737 ; GFX803-NEXT: v_add_u32_e32 v0, vcc, 0xfffff002, v0
738 ; GFX803-NEXT: v_addc_u32_e32 v1, vcc, -1, v1, vcc
739 ; GFX803-NEXT: flat_load_ushort v0, v[0:1]
740 ; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v2
741 ; GFX803-NEXT: s_waitcnt vmcnt(0)
742 ; GFX803-NEXT: v_or_b32_e32 v0, v0, v1
743 ; GFX803-NEXT: flat_store_dword v[0:1], v0
744 ; GFX803-NEXT: s_waitcnt vmcnt(0)
745 ; GFX803-NEXT: s_setpc_b64 s[30:31]
776 ; GFX803-LABEL: load_global_lo_v2i16_reglo_vreg_zexti8:
777 ; GFX803: ; %bb.0: ; %entry
778 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
779 ; GFX803-NEXT: v_add_u32_e32 v0, vcc, 0xfffff001, v0
780 ; GFX803-NEXT: v_addc_u32_e32 v1, vcc, -1, v1, vcc
781 ; GFX803-NEXT: flat_load_ubyte v0, v[0:1]
782 ; GFX803-NEXT: v_lshrrev_b32_e32 v1, 16, v2
783 ; GFX803-NEXT: s_mov_b32 s4, 0x5040c00
784 ; GFX803-NEXT: s_waitcnt vmcnt(0)
785 ; GFX803-NEXT: v_perm_b32 v0, v1, v0, s4
786 ; GFX803-NEXT: flat_store_dword v[0:1], v0
787 ; GFX803-NEXT: s_waitcnt vmcnt(0)
788 ; GFX803-NEXT: s_setpc_b64 s[30:31]
820 ; GFX803-LABEL: load_global_lo_v2i16_reglo_vreg_sexti8:
821 ; GFX803: ; %bb.0: ; %entry
822 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
823 ; GFX803-NEXT: v_add_u32_e32 v0, vcc, 0xfffff001, v0
824 ; GFX803-NEXT: v_addc_u32_e32 v1, vcc, -1, v1, vcc
825 ; GFX803-NEXT: flat_load_sbyte v0, v[0:1]
826 ; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v2
827 ; GFX803-NEXT: s_waitcnt vmcnt(0)
828 ; GFX803-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1…
829 ; GFX803-NEXT: flat_store_dword v[0:1], v0
830 ; GFX803-NEXT: s_waitcnt vmcnt(0)
831 ; GFX803-NEXT: s_setpc_b64 s[30:31]
864 ; GFX803-LABEL: load_global_lo_v2f16_reglo_vreg_zexti8:
865 ; GFX803: ; %bb.0: ; %entry
866 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
867 ; GFX803-NEXT: v_add_u32_e32 v0, vcc, 0xfffff001, v0
868 ; GFX803-NEXT: v_addc_u32_e32 v1, vcc, -1, v1, vcc
869 ; GFX803-NEXT: flat_load_ubyte v0, v[0:1]
870 ; GFX803-NEXT: v_lshrrev_b32_e32 v1, 16, v2
871 ; GFX803-NEXT: s_mov_b32 s4, 0x5040c00
872 ; GFX803-NEXT: s_waitcnt vmcnt(0)
873 ; GFX803-NEXT: v_perm_b32 v0, v1, v0, s4
874 ; GFX803-NEXT: flat_store_dword v[0:1], v0
875 ; GFX803-NEXT: s_waitcnt vmcnt(0)
876 ; GFX803-NEXT: s_setpc_b64 s[30:31]
910 ; GFX803-LABEL: load_global_lo_v2f16_reglo_vreg_sexti8:
911 ; GFX803: ; %bb.0: ; %entry
912 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
913 ; GFX803-NEXT: v_add_u32_e32 v0, vcc, 0xfffff001, v0
914 ; GFX803-NEXT: v_addc_u32_e32 v1, vcc, -1, v1, vcc
915 ; GFX803-NEXT: flat_load_sbyte v0, v[0:1]
916 ; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v2
917 ; GFX803-NEXT: s_waitcnt vmcnt(0)
918 ; GFX803-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1…
919 ; GFX803-NEXT: flat_store_dword v[0:1], v0
920 ; GFX803-NEXT: s_waitcnt vmcnt(0)
921 ; GFX803-NEXT: s_setpc_b64 s[30:31]
954 ; GFX803-LABEL: load_flat_lo_v2i16_reghi_vreg:
955 ; GFX803: ; %bb.0: ; %entry
956 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
957 ; GFX803-NEXT: flat_load_ushort v0, v[0:1]
958 ; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v2
959 ; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
960 ; GFX803-NEXT: v_or_b32_e32 v0, v0, v1
961 ; GFX803-NEXT: flat_store_dword v[0:1], v0
962 ; GFX803-NEXT: s_waitcnt vmcnt(0)
963 ; GFX803-NEXT: s_setpc_b64 s[30:31]
994 ; GFX803-LABEL: load_flat_lo_v2f16_reghi_vreg:
995 ; GFX803: ; %bb.0: ; %entry
996 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
997 ; GFX803-NEXT: flat_load_ushort v0, v[0:1]
998 ; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v2
999 ; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1000 ; GFX803-NEXT: v_or_b32_e32 v0, v0, v1
1001 ; GFX803-NEXT: flat_store_dword v[0:1], v0
1002 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1003 ; GFX803-NEXT: s_setpc_b64 s[30:31]
1035 ; GFX803-LABEL: load_flat_lo_v2i16_reglo_vreg_zexti8:
1036 ; GFX803: ; %bb.0: ; %entry
1037 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1038 ; GFX803-NEXT: flat_load_ubyte v0, v[0:1]
1039 ; GFX803-NEXT: v_lshrrev_b32_e32 v2, 16, v2
1040 ; GFX803-NEXT: s_mov_b32 s4, 0x5040c00
1041 ; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1042 ; GFX803-NEXT: v_perm_b32 v0, v2, v0, s4
1043 ; GFX803-NEXT: flat_store_dword v[0:1], v0
1044 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1045 ; GFX803-NEXT: s_setpc_b64 s[30:31]
1076 ; GFX803-LABEL: load_flat_lo_v2i16_reglo_vreg_sexti8:
1077 ; GFX803: ; %bb.0: ; %entry
1078 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1079 ; GFX803-NEXT: flat_load_sbyte v0, v[0:1]
1080 ; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v2
1081 ; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1082 ; GFX803-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1…
1083 ; GFX803-NEXT: flat_store_dword v[0:1], v0
1084 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1085 ; GFX803-NEXT: s_setpc_b64 s[30:31]
1117 ; GFX803-LABEL: load_flat_lo_v2f16_reglo_vreg_zexti8:
1118 ; GFX803: ; %bb.0: ; %entry
1119 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1120 ; GFX803-NEXT: flat_load_ubyte v0, v[0:1]
1121 ; GFX803-NEXT: v_lshrrev_b32_e32 v2, 16, v2
1122 ; GFX803-NEXT: s_mov_b32 s4, 0x5040c00
1123 ; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1124 ; GFX803-NEXT: v_perm_b32 v0, v2, v0, s4
1125 ; GFX803-NEXT: flat_store_dword v[0:1], v0
1126 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1127 ; GFX803-NEXT: s_setpc_b64 s[30:31]
1160 ; GFX803-LABEL: load_flat_lo_v2f16_reglo_vreg_sexti8:
1161 ; GFX803: ; %bb.0: ; %entry
1162 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1163 ; GFX803-NEXT: flat_load_sbyte v0, v[0:1]
1164 ; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v2
1165 ; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1166 ; GFX803-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1…
1167 ; GFX803-NEXT: flat_store_dword v[0:1], v0
1168 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1169 ; GFX803-NEXT: s_setpc_b64 s[30:31]
1201 ; GFX803-LABEL: load_private_lo_v2i16_reglo_vreg:
1202 ; GFX803: ; %bb.0: ; %entry
1203 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1204 ; GFX803-NEXT: buffer_load_ushort v1, off, s[0:3], s32 offset:4094
1205 ; GFX803-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
1206 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1207 ; GFX803-NEXT: v_or_b32_e32 v0, v1, v0
1208 ; GFX803-NEXT: flat_store_dword v[0:1], v0
1209 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1210 ; GFX803-NEXT: s_setpc_b64 s[30:31]
1252 ; GFX803-LABEL: load_private_lo_v2i16_reghi_vreg:
1253 ; GFX803: ; %bb.0: ; %entry
1254 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1255 ; GFX803-NEXT: buffer_load_ushort v1, off, s[0:3], s32 offset:4094
1256 ; GFX803-NEXT: v_lshlrev_b32_e32 v0, 16, v0
1257 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1258 ; GFX803-NEXT: v_or_b32_e32 v0, v1, v0
1259 ; GFX803-NEXT: flat_store_dword v[0:1], v0
1260 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1261 ; GFX803-NEXT: s_setpc_b64 s[30:31]
1304 ; GFX803-LABEL: load_private_lo_v2f16_reglo_vreg:
1305 ; GFX803: ; %bb.0: ; %entry
1306 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1307 ; GFX803-NEXT: buffer_load_ushort v1, off, s[0:3], s32 offset:4094
1308 ; GFX803-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
1309 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1310 ; GFX803-NEXT: v_or_b32_e32 v0, v1, v0
1311 ; GFX803-NEXT: flat_store_dword v[0:1], v0
1312 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1313 ; GFX803-NEXT: s_setpc_b64 s[30:31]
1353 ; GFX803-LABEL: load_private_lo_v2i16_reglo_vreg_nooff:
1354 ; GFX803: ; %bb.0: ; %entry
1355 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1356 ; GFX803-NEXT: buffer_load_ushort v0, off, s[0:3], 0 offset:4094
1357 ; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
1358 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1359 ; GFX803-NEXT: v_or_b32_e32 v0, v0, v1
1360 ; GFX803-NEXT: flat_store_dword v[0:1], v0
1361 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1362 ; GFX803-NEXT: s_setpc_b64 s[30:31]
1402 ; GFX803-LABEL: load_private_lo_v2i16_reghi_vreg_nooff:
1403 ; GFX803: ; %bb.0: ; %entry
1404 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1405 ; GFX803-NEXT: buffer_load_ushort v0, off, s[0:3], 0 offset:4094
1406 ; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
1407 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1408 ; GFX803-NEXT: v_or_b32_e32 v0, v0, v1
1409 ; GFX803-NEXT: flat_store_dword v[0:1], v0
1410 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1411 ; GFX803-NEXT: s_setpc_b64 s[30:31]
1452 ; GFX803-LABEL: load_private_lo_v2f16_reglo_vreg_nooff:
1453 ; GFX803: ; %bb.0: ; %entry
1454 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1455 ; GFX803-NEXT: buffer_load_ushort v0, off, s[0:3], 0 offset:4094
1456 ; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
1457 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1458 ; GFX803-NEXT: v_or_b32_e32 v0, v0, v1
1459 ; GFX803-NEXT: flat_store_dword v[0:1], v0
1460 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1461 ; GFX803-NEXT: s_setpc_b64 s[30:31]
1501 ; GFX803-LABEL: load_private_lo_v2i16_reglo_vreg_zexti8:
1502 ; GFX803: ; %bb.0: ; %entry
1503 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1504 ; GFX803-NEXT: buffer_load_ubyte v1, off, s[0:3], s32 offset:4095
1505 ; GFX803-NEXT: v_lshrrev_b32_e32 v0, 16, v0
1506 ; GFX803-NEXT: s_mov_b32 s4, 0x5040c00
1507 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1508 ; GFX803-NEXT: v_perm_b32 v0, v0, v1, s4
1509 ; GFX803-NEXT: flat_store_dword v[0:1], v0
1510 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1511 ; GFX803-NEXT: s_setpc_b64 s[30:31]
1552 ; GFX803-LABEL: load_private_lo_v2i16_reglo_vreg_sexti8:
1553 ; GFX803: ; %bb.0: ; %entry
1554 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1555 ; GFX803-NEXT: buffer_load_sbyte v1, off, s[0:3], s32 offset:4095
1556 ; GFX803-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
1557 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1558 ; GFX803-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1…
1559 ; GFX803-NEXT: flat_store_dword v[0:1], v0
1560 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1561 ; GFX803-NEXT: s_setpc_b64 s[30:31]
1602 ; GFX803-LABEL: load_private_lo_v2i16_reglo_vreg_nooff_zexti8:
1603 ; GFX803: ; %bb.0: ; %entry
1604 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1605 ; GFX803-NEXT: v_lshrrev_b32_e32 v0, 16, v1
1606 ; GFX803-NEXT: buffer_load_ubyte v1, off, s[0:3], 0 offset:4094
1607 ; GFX803-NEXT: s_mov_b32 s4, 0x5040c00
1608 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1609 ; GFX803-NEXT: v_perm_b32 v0, v0, v1, s4
1610 ; GFX803-NEXT: flat_store_dword v[0:1], v0
1611 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1612 ; GFX803-NEXT: s_setpc_b64 s[30:31]
1653 ; GFX803-LABEL: load_private_lo_v2i16_reglo_vreg_nooff_sexti8:
1654 ; GFX803: ; %bb.0: ; %entry
1655 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1656 ; GFX803-NEXT: buffer_load_sbyte v0, off, s[0:3], 0 offset:4094
1657 ; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
1658 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1659 ; GFX803-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1…
1660 ; GFX803-NEXT: flat_store_dword v[0:1], v0
1661 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1662 ; GFX803-NEXT: s_setpc_b64 s[30:31]
1704 ; GFX803-LABEL: load_private_lo_v2f16_reglo_vreg_nooff_zexti8:
1705 ; GFX803: ; %bb.0: ; %entry
1706 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1707 ; GFX803-NEXT: v_lshrrev_b32_e32 v0, 16, v1
1708 ; GFX803-NEXT: buffer_load_ubyte v1, off, s[0:3], 0 offset:4094
1709 ; GFX803-NEXT: s_mov_b32 s4, 0x5040c00
1710 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1711 ; GFX803-NEXT: v_perm_b32 v0, v0, v1, s4
1712 ; GFX803-NEXT: flat_store_dword v[0:1], v0
1713 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1714 ; GFX803-NEXT: s_setpc_b64 s[30:31]
1756 ; GFX803-LABEL: load_constant_lo_v2i16_reglo_vreg:
1757 ; GFX803: ; %bb.0: ; %entry
1758 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1759 ; GFX803-NEXT: v_add_u32_e32 v0, vcc, 0xfffff002, v0
1760 ; GFX803-NEXT: v_addc_u32_e32 v1, vcc, -1, v1, vcc
1761 ; GFX803-NEXT: flat_load_ushort v0, v[0:1]
1762 ; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v2
1763 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1764 ; GFX803-NEXT: v_or_b32_e32 v0, v0, v1
1765 ; GFX803-NEXT: flat_store_dword v[0:1], v0
1766 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1767 ; GFX803-NEXT: s_setpc_b64 s[30:31]
1799 ; GFX803-LABEL: load_constant_lo_v2f16_reglo_vreg:
1800 ; GFX803: ; %bb.0: ; %entry
1801 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1802 ; GFX803-NEXT: v_add_u32_e32 v0, vcc, 0xfffff002, v0
1803 ; GFX803-NEXT: v_addc_u32_e32 v1, vcc, -1, v1, vcc
1804 ; GFX803-NEXT: flat_load_ushort v0, v[0:1]
1805 ; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v2
1806 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1807 ; GFX803-NEXT: v_or_b32_e32 v0, v0, v1
1808 ; GFX803-NEXT: flat_store_dword v[0:1], v0
1809 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1810 ; GFX803-NEXT: s_setpc_b64 s[30:31]
1842 ; GFX803-LABEL: load_constant_lo_v2f16_reglo_vreg_zexti8:
1843 ; GFX803: ; %bb.0: ; %entry
1844 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1845 ; GFX803-NEXT: v_add_u32_e32 v0, vcc, 0xfffff001, v0
1846 ; GFX803-NEXT: v_addc_u32_e32 v1, vcc, -1, v1, vcc
1847 ; GFX803-NEXT: flat_load_ubyte v0, v[0:1]
1848 ; GFX803-NEXT: v_lshrrev_b32_e32 v1, 16, v2
1849 ; GFX803-NEXT: s_mov_b32 s4, 0x5040c00
1850 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1851 ; GFX803-NEXT: v_perm_b32 v0, v1, v0, s4
1852 ; GFX803-NEXT: flat_store_dword v[0:1], v0
1853 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1854 ; GFX803-NEXT: s_setpc_b64 s[30:31]
1888 ; GFX803-LABEL: load_constant_lo_v2f16_reglo_vreg_sexti8:
1889 ; GFX803: ; %bb.0: ; %entry
1890 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1891 ; GFX803-NEXT: v_add_u32_e32 v0, vcc, 0xfffff001, v0
1892 ; GFX803-NEXT: v_addc_u32_e32 v1, vcc, -1, v1, vcc
1893 ; GFX803-NEXT: flat_load_sbyte v0, v[0:1]
1894 ; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v2
1895 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1896 ; GFX803-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1…
1897 ; GFX803-NEXT: flat_store_dword v[0:1], v0
1898 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1899 ; GFX803-NEXT: s_setpc_b64 s[30:31]
1936 ; GFX803-LABEL: load_private_lo_v2i16_reglo_vreg_to_offset:
1937 ; GFX803: ; %bb.0: ; %entry
1938 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1939 ; GFX803-NEXT: v_mov_b32_e32 v1, 0x7b
1940 ; GFX803-NEXT: buffer_store_dword v1, off, s[0:3], s32
1941 ; GFX803-NEXT: buffer_load_ushort v1, off, s[0:3], s32 offset:4094
1942 ; GFX803-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
1943 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1944 ; GFX803-NEXT: v_or_b32_e32 v0, v1, v0
1945 ; GFX803-NEXT: flat_store_dword v[0:1], v0
1946 ; GFX803-NEXT: s_waitcnt vmcnt(0)
1947 ; GFX803-NEXT: s_setpc_b64 s[30:31]
1997 ; GFX803-LABEL: load_private_lo_v2i16_reglo_vreg_sexti8_to_offset:
1998 ; GFX803: ; %bb.0: ; %entry
1999 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2000 ; GFX803-NEXT: v_mov_b32_e32 v1, 0x7b
2001 ; GFX803-NEXT: buffer_store_dword v1, off, s[0:3], s32
2002 ; GFX803-NEXT: buffer_load_sbyte v1, off, s[0:3], s32 offset:4095
2003 ; GFX803-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
2004 ; GFX803-NEXT: s_waitcnt vmcnt(0)
2005 ; GFX803-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1…
2006 ; GFX803-NEXT: flat_store_dword v[0:1], v0
2007 ; GFX803-NEXT: s_waitcnt vmcnt(0)
2008 ; GFX803-NEXT: s_setpc_b64 s[30:31]
2059 ; GFX803-LABEL: load_private_lo_v2i16_reglo_vreg_zexti8_to_offset:
2060 ; GFX803: ; %bb.0: ; %entry
2061 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2062 ; GFX803-NEXT: v_mov_b32_e32 v1, 0x7b
2063 ; GFX803-NEXT: buffer_store_dword v1, off, s[0:3], s32
2064 ; GFX803-NEXT: buffer_load_ubyte v1, off, s[0:3], s32 offset:4095
2065 ; GFX803-NEXT: v_lshrrev_b32_e32 v0, 16, v0
2066 ; GFX803-NEXT: s_mov_b32 s4, 0x5040c00
2067 ; GFX803-NEXT: s_waitcnt vmcnt(0)
2068 ; GFX803-NEXT: v_perm_b32 v0, v0, v1, s4
2069 ; GFX803-NEXT: flat_store_dword v[0:1], v0
2070 ; GFX803-NEXT: s_waitcnt vmcnt(0)
2071 ; GFX803-NEXT: s_setpc_b64 s[30:31]
2123 ; GFX803-LABEL: load_private_lo_v2f16_reglo_vreg_sexti8_to_offset:
2124 ; GFX803: ; %bb.0: ; %entry
2125 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2126 ; GFX803-NEXT: v_mov_b32_e32 v1, 0x7b
2127 ; GFX803-NEXT: buffer_store_dword v1, off, s[0:3], s32
2128 ; GFX803-NEXT: buffer_load_sbyte v1, off, s[0:3], s32 offset:4095
2129 ; GFX803-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
2130 ; GFX803-NEXT: s_waitcnt vmcnt(0)
2131 ; GFX803-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1…
2132 ; GFX803-NEXT: flat_store_dword v[0:1], v0
2133 ; GFX803-NEXT: s_waitcnt vmcnt(0)
2134 ; GFX803-NEXT: s_setpc_b64 s[30:31]
2187 ; GFX803-LABEL: load_private_lo_v2f16_reglo_vreg_zexti8_to_offset:
2188 ; GFX803: ; %bb.0: ; %entry
2189 ; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2190 ; GFX803-NEXT: v_mov_b32_e32 v1, 0x7b
2191 ; GFX803-NEXT: buffer_store_dword v1, off, s[0:3], s32
2192 ; GFX803-NEXT: buffer_load_ubyte v1, off, s[0:3], s32 offset:4095
2193 ; GFX803-NEXT: v_lshrrev_b32_e32 v0, 16, v0
2194 ; GFX803-NEXT: s_mov_b32 s4, 0x5040c00
2195 ; GFX803-NEXT: s_waitcnt vmcnt(0)
2196 ; GFX803-NEXT: v_perm_b32 v0, v0, v1, s4
2197 ; GFX803-NEXT: flat_store_dword v[0:1], v0
2198 ; GFX803-NEXT: s_waitcnt vmcnt(0)
2199 ; GFX803-NEXT: s_setpc_b64 s[30:31]