1; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s 2; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s 3 4declare i32 @llvm.bswap.i32(i32) nounwind readnone 5declare <2 x i32> @llvm.bswap.v2i32(<2 x i32>) nounwind readnone 6declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>) nounwind readnone 7declare <8 x i32> @llvm.bswap.v8i32(<8 x i32>) nounwind readnone 8declare i64 @llvm.bswap.i64(i64) nounwind readnone 9declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>) nounwind readnone 10declare <4 x i64> @llvm.bswap.v4i64(<4 x i64>) nounwind readnone 11 12; FUNC-LABEL: @test_bswap_i32 13; SI: buffer_load_dword [[VAL:v[0-9]+]] 14; SI-DAG: v_alignbit_b32 [[TMP0:v[0-9]+]], [[VAL]], [[VAL]], 8 15; SI-DAG: v_alignbit_b32 [[TMP1:v[0-9]+]], [[VAL]], [[VAL]], 24 16; SI-DAG: s_mov_b32 [[K:s[0-9]+]], 0xff00ff 17; SI: v_bfi_b32 [[RESULT:v[0-9]+]], [[K]], [[TMP1]], [[TMP0]] 18; SI: buffer_store_dword [[RESULT]] 19; SI: s_endpgm 20define void @test_bswap_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { 21 %val = load i32, i32 addrspace(1)* %in, align 4 22 %bswap = call i32 @llvm.bswap.i32(i32 %val) nounwind readnone 23 store i32 %bswap, i32 addrspace(1)* %out, align 4 24 ret void 25} 26 27; FUNC-LABEL: @test_bswap_v2i32 28; SI-DAG: v_alignbit_b32 29; SI-DAG: v_alignbit_b32 30; SI-DAG: v_bfi_b32 31; SI-DAG: v_alignbit_b32 32; SI-DAG: v_alignbit_b32 33; SI-DAG: v_bfi_b32 34; SI: s_endpgm 35define void @test_bswap_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) nounwind { 36 %val = load <2 x i32>, <2 x i32> addrspace(1)* %in, align 8 37 %bswap = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %val) nounwind readnone 38 store <2 x i32> %bswap, <2 x i32> addrspace(1)* %out, align 8 39 ret void 40} 41 42; FUNC-LABEL: @test_bswap_v4i32 43; SI-DAG: v_alignbit_b32 44; SI-DAG: v_alignbit_b32 45; SI-DAG: v_bfi_b32 46; SI-DAG: v_alignbit_b32 47; SI-DAG: v_alignbit_b32 48; SI-DAG: v_bfi_b32 49; SI-DAG: v_alignbit_b32 50; SI-DAG: v_alignbit_b32 51; SI-DAG: v_bfi_b32 52; SI-DAG: v_alignbit_b32 53; SI-DAG: v_alignbit_b32 54; SI-DAG: v_bfi_b32 55; SI: s_endpgm 56define void @test_bswap_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) nounwind { 57 %val = load <4 x i32>, <4 x i32> addrspace(1)* %in, align 16 58 %bswap = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %val) nounwind readnone 59 store <4 x i32> %bswap, <4 x i32> addrspace(1)* %out, align 16 60 ret void 61} 62 63; FUNC-LABEL: @test_bswap_v8i32 64; SI-DAG: v_alignbit_b32 65; SI-DAG: v_alignbit_b32 66; SI-DAG: v_bfi_b32 67; SI-DAG: v_alignbit_b32 68; SI-DAG: v_alignbit_b32 69; SI-DAG: v_bfi_b32 70; SI-DAG: v_alignbit_b32 71; SI-DAG: v_alignbit_b32 72; SI-DAG: v_bfi_b32 73; SI-DAG: v_alignbit_b32 74; SI-DAG: v_alignbit_b32 75; SI-DAG: v_bfi_b32 76; SI-DAG: v_alignbit_b32 77; SI-DAG: v_alignbit_b32 78; SI-DAG: v_bfi_b32 79; SI-DAG: v_alignbit_b32 80; SI-DAG: v_alignbit_b32 81; SI-DAG: v_bfi_b32 82; SI-DAG: v_alignbit_b32 83; SI-DAG: v_alignbit_b32 84; SI-DAG: v_bfi_b32 85; SI-DAG: v_alignbit_b32 86; SI-DAG: v_alignbit_b32 87; SI-DAG: v_bfi_b32 88; SI: s_endpgm 89define void @test_bswap_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(1)* %in) nounwind { 90 %val = load <8 x i32>, <8 x i32> addrspace(1)* %in, align 32 91 %bswap = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %val) nounwind readnone 92 store <8 x i32> %bswap, <8 x i32> addrspace(1)* %out, align 32 93 ret void 94} 95 96define void @test_bswap_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) nounwind { 97 %val = load i64, i64 addrspace(1)* %in, align 8 98 %bswap = call i64 @llvm.bswap.i64(i64 %val) nounwind readnone 99 store i64 %bswap, i64 addrspace(1)* %out, align 8 100 ret void 101} 102 103define void @test_bswap_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) nounwind { 104 %val = load <2 x i64>, <2 x i64> addrspace(1)* %in, align 16 105 %bswap = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %val) nounwind readnone 106 store <2 x i64> %bswap, <2 x i64> addrspace(1)* %out, align 16 107 ret void 108} 109 110define void @test_bswap_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) nounwind { 111 %val = load <4 x i64>, <4 x i64> addrspace(1)* %in, align 32 112 %bswap = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %val) nounwind readnone 113 store <4 x i64> %bswap, <4 x i64> addrspace(1)* %out, align 32 114 ret void 115} 116